Python get_table_columns Exemples, newslynx.models.util.get_table_columns Python Exemples

Exemple #1

0

Afficher le fichier

def update_metric(user, org, name_id):

    m = fetch_by_id_or_field(Metric, 'name', name_id, org_id=org.id)
    if not m:
        raise NotFoundError(
            'Metric "{}" does not yet exist for Org "{}"'
            .format(name_id, org.name))

    # get the request data
    req_data = request_data()

    # filter out any non-columns
    columns = get_table_columns(Metric)
    for k in req_data.keys():
        if k not in columns:
            req_data.pop(k)

    # don't ever overwrite these:
    for k in ['id', 'recipe_id', 'name', 'org_id', 'created', 'updated']:
        if k in req_data:
            req_data.pop(k, None)

    # update fields
    for k, v in req_data.items():
        setattr(m, k, v)
    try:
        db.session.add(m)
        db.session.commit()
    except Exception as e:
        raise RequestError("Error updating Metric: {}".format(e.message))

    return jsonify(m)

Exemple #2

0

Afficher le fichier

Fichier : authors_api.py Projet : abelsonlive/newslynx-core

def update_author(user, org, author_id):
    """
    Update an author.
    """

    a = fetch_by_id_or_field(Author, 'name', author_id,
                             org_id=org.id, transform='upper')
    if not a:
        raise NotFoundError(
            'Author with ID/Name "{}" does not exist."'
            .format(author_id))

    req_data = request_data()

    cols = get_table_columns(Author)
    for k in req_data.keys():
        if k not in cols or k in ['id', 'org_id']:
            req_data.pop(k, None)

    for k, v in req_data.items():
        if k == 'name':
            if v:
                v = v.upper()
        setattr(a, k, v)

    db.session.add(a)
    db.session.commit()
    return jsonify(a)

Exemple #3

0

Afficher le fichier

Fichier : authors_api.py Projet : newslynx/newslynx-core

def create_author(user, org):
    """
    Create an author.
    """
    req_data = request_data()
    cols = get_table_columns(Author)
    if 'name' not in req_data:
        raise RequestError("A 'name' is required to create an Author.")

    for k in req_data.keys():
        if k not in cols or k in ['id', 'org_id']:
            req_data.pop(k, None)

        # upper-case.
        elif k == 'name':
            req_data[k] = req_data[k].upper()

    a = Author(org_id=org.id, **req_data)

    try:
        db.session.add(a)
        db.session.commit()

    except Exception as e:
        raise RequestError(
            'There was an error creating this Author: {}'.format(e.message))
    return jsonify(a)

Exemple #4

0

Afficher le fichier

Fichier : authors_api.py Projet : jjelosua/newslynx-core

def update_author(user, org, author_id):
    """
    Update an author.
    """

    a = Author.query\
        .filter_by(id=author_id, org_id=org.id)\
        .first()

    if not a:
        raise NotFoundError(
            'Author with ID "{}" does not exist."'.format(author_id))

    req_data = request_data()

    cols = get_table_columns(Author)
    for k in req_data.keys():
        if k not in cols or k in ['id', 'org_id']:
            req_data.pop(k, None)

    for k, v in req_data.items():
        setattr(a, k, v)

    db.session.add(a)
    db.session.commit()

    return jsonify(a)

Exemple #5

0

Afficher le fichier

Fichier : authors_api.py Projet : newslynx/newslynx-core

def update_author(user, org, author_id):
    """
    Update an author.
    """

    a = fetch_by_id_or_field(Author,
                             'name',
                             author_id,
                             org_id=org.id,
                             transform='upper')
    if not a:
        raise NotFoundError(
            'Author with ID/Name "{}" does not exist."'.format(author_id))

    req_data = request_data()

    cols = get_table_columns(Author)
    for k in req_data.keys():
        if k not in cols or k in ['id', 'org_id']:
            req_data.pop(k, None)

    for k, v in req_data.items():
        if k == 'name':
            if v:
                v = v.upper()
        setattr(a, k, v)

    db.session.add(a)
    db.session.commit()
    return jsonify(a)

Exemple #6

0

Afficher le fichier

Fichier : authors_api.py Projet : abelsonlive/newslynx-core

def create_author(user, org):
    """
    Create an author.
    """
    req_data = request_data()
    cols = get_table_columns(Author)
    if 'name' not in req_data:
        raise RequestError(
            "A 'name' is required to create an Author.")

    for k in req_data.keys():
        if k not in cols or k in ['id', 'org_id']:
            req_data.pop(k, None)

        # upper-case.
        elif k == 'name':
            req_data[k] = req_data[k].upper()

    a = Author(org_id=org.id, **req_data)

    try:
        db.session.add(a)
        db.session.commit()

    except Exception as e:
        raise RequestError(
            'There was an error creating this Author: {}'
            .format(e.message))
    return jsonify(a)

Exemple #7

0

Afficher le fichier

Fichier : authors_api.py Projet : jjelosua/newslynx-core

def update_author(user, org, author_id):
    """
    Update an author.
    """

    a = Author.query\
        .filter_by(id=author_id, org_id=org.id)\
        .first()

    if not a:
        raise NotFoundError(
            'Author with ID "{}" does not exist."'
            .format(author_id))

    req_data = request_data()

    cols = get_table_columns(Author)
    for k in req_data.keys():
        if k not in cols or k in ['id', 'org_id']:
            req_data.pop(k, None)

    for k, v in req_data.items():
        setattr(a, k, v)

    db.session.add(a)
    db.session.commit()

    return jsonify(a)

Exemple #8

0

Afficher le fichier

Fichier : tags_api.py Projet : newslynx/newslynx-core

def update_tag(user, org, tag_id):
    """
    Update an individual tag.
    """
    # fetch the tag object
    tag = fetch_by_id_or_field(Tag, 'slug', tag_id, org_id=org.id)
    if not tag:
        raise NotFoundError(
            'A Tag with ID {} does not exist'
            .format(tag_id))

    # fetch the request data.
    req_data = request_data()

    # check hex code
    if 'color' in req_data:
        validate_hex_code(req_data['color'])

    # check tag type
    if 'type' in req_data:
        validate_tag_types(req_data['type'])

        # if tag type is "impact" ensure a proper category and
        # level are included
        if req_data['type'] == 'impact':
            validate_tag_categories(req_data['category'])
            validate_tag_levels(req_data['level'])

    # check if levels + categories are being assigned to
    # subject tags
    if tag.type == 'subject':
        if req_data.get('category') or req_data.get('level'):
            raise RequestError(
                'Categories and Levels can only be set for Impact Tags')

    # set org id
    req_data['org_id'] = org.id

    # filter out non-table columns
    columns = get_table_columns(Tag)
    for k in req_data.keys():
        if k not in columns:
            req_data.pop(k)

    # update attributes
    for k, v in req_data.items():
        setattr(tag, k, v)

    db.session.add(tag)

    # check for dupes
    try:
        db.session.commit()
    except Exception as err:
        raise RequestError(err.message)

    return jsonify(tag)

Exemple #9

0

Afficher le fichier

Fichier : ingest.py Projet : abelsonlive/newslynx-core

    def _clean():
        __prepare = partial(
            _prepare, requires=requires, recipe=recipe, org_id=org_id, type='event')
        for obj in p.imap_unordered(__prepare, data):
            # split out tags_ids + content_item_ids + links
            meta[obj['source_id']] = dict(
                tag_ids=obj.pop('tag_ids', obj.pop('tags', [])),
                content_item_ids=obj.pop('content_item_ids', []),
                links=obj.pop('links', [])
            )
            # split out meta fields
            obj = _split_meta(obj, get_table_columns(Event))

            # add to lookup
            events[obj['source_id']] = obj

Exemple #10

0

Afficher le fichier

def validate_fields(obj, fields=[], incl=[], suffix='to select by'):
    """
    check a list of fields against column names.
    """
    columns = get_table_columns(obj, incl)
    bad_fields = []
    for field in fields:
        if field not in columns:
            bad_fields.append(field)

    if len(bad_fields):
        if len(bad_fields) == 1:
            msg = 'is not a valid field name'
        else:
            msg = 'are not valid field names'
        raise RequestError("'{}' {} {}. Choose from: {}.".format(
            ', '.join(bad_fields), msg, suffix, ", ".join(columns)))

Exemple #11

0

Afficher le fichier

Fichier : ingest.py Projet : newslynx/newslynx-core

    def _clean():
        __prepare = partial(_prepare,
                            requires=requires,
                            recipe=recipe,
                            org_id=org_id,
                            type='event')
        for obj in p.imap_unordered(__prepare, data):
            # split out tags_ids + content_item_ids + links
            meta[obj['source_id']] = dict(
                tag_ids=obj.pop('tag_ids', obj.pop('tags', [])),
                content_item_ids=obj.pop('content_item_ids', []),
                links=obj.pop('links', []))
            # split out meta fields
            obj = _split_meta(obj, get_table_columns(Event))

            # add to lookup
            events[obj['source_id']] = obj

Exemple #12

0

Afficher le fichier

Fichier : ingest.py Projet : abelsonlive/newslynx-core

    def _clean():
        __prepare = partial(_prepare, requires=requires, recipe=recipe,
                            org_id=org_id, type='content_item',
                            extract=kw.get('extract', True))
        for obj in p.imap_unordered(__prepare, data):

            # determine unique id.
            uniqkey = "{}||{}".format(obj['url'], obj['type'])

            # set metadata.
            meta[uniqkey] = dict(
                author_ids=obj.pop('author_ids', obj.pop('authors', [])),
                tag_ids=obj.pop('tag_ids', obj.pop('tags', [])),
                links=obj.pop('links', []),
            )
            # split out meta fields
            obj = _split_meta(obj, get_table_columns(ContentItem))
            cis[uniqkey] = obj

Exemple #13

0

Afficher le fichier

Fichier : util.py Projet : jjelosua/newslynx-core

def validate_fields(obj, fields=[], incl=[], suffix='to select by'):
    """
    check a list of fields against column names.
    """
    columns = get_table_columns(obj, incl)
    bad_fields = []
    for field in fields:
        if field not in columns:
            bad_fields.append(field)

    if len(bad_fields):
        if len(bad_fields) == 1:
            msg = 'is not a valid field name'
        else:
            msg = 'are not valid field names'
        raise RequestError(
            "'{}' {} {}. Choose from: {}."
            .format(', '.join(bad_fields), msg, suffix, ", ".join(columns)))

Exemple #14

0

Afficher le fichier

Fichier : ingest.py Projet : newslynx/newslynx-core

    def _clean():
        __prepare = partial(_prepare,
                            requires=requires,
                            recipe=recipe,
                            org_id=org_id,
                            type='content_item',
                            extract=kw.get('extract', True))
        for obj in p.imap_unordered(__prepare, data):

            # determine unique id.
            uniqkey = "{}||{}".format(obj['url'], obj['type'])

            # set metadata.
            meta[uniqkey] = dict(
                author_ids=obj.pop('author_ids', obj.pop('authors', [])),
                tag_ids=obj.pop('tag_ids', obj.pop('tags', [])),
                links=obj.pop('links', []),
            )
            # split out meta fields
            obj = _split_meta(obj, get_table_columns(ContentItem))
            cis[uniqkey] = obj

Exemple #15

0

Afficher le fichier

def ingest(obj,
           org_id,
           url_fields=['body'],
           requires=['url', 'type'],
           extract=True,
           kill_session=True):
    """
    Ingest an Event.
    """

    # distinct session for this eventlet.
    session = gen_session()

    # check required fields
    ingest_util.check_requires(obj, requires, type='Content Item')

    # validate type
    validate_content_item_types(obj['type'])

    # check if the org_id is in the body
    # TODO: I don't think this is necessary.
    org_id = obj.pop('org_id', org_id)

    # get rid of ``id`` if it somehow got in here.
    obj.pop('id', None)

    # normalize the url
    obj['url'] = ingest_util.prepare_url(obj, 'url')

    # run article extraction.
    if extract:
        cache_response = extract_cache.get(url=obj['url'], type=obj['type'])
        if not cache_response:

            # make sure to kill this key.
            extract_cache.invalidate(url=obj['url'], type=obj['type'])
            raise RequestError(
                'Extraction failed on {type} - {url}'.format(**obj))

        # extraction succeeded
        else:
            data = cache_response.value
            obj.update(data)

    else:
        obj['title'] = ingest_util.prepare_str(obj, 'title')
        obj['description'] = ingest_util.prepare_str(obj, 'description')
        obj['body'] = ingest_util.prepare_str(obj, 'body')
        obj['created'] = ingest_util.prepare_str(obj, 'created')
        if not obj['created']:
            obj.pop('created')

    # get thumbnail
    obj['thumbnail'] = ingest_util.prepare_thumbnail(obj, 'img_url')

    # split out tags_ids + authors + links
    tag_ids = obj.pop('tag_ids', [])
    authors = obj.pop('author_ids', [])
    authors.extend(obj.pop('authors', []))  # accept names too
    # links = obj.pop('links', {})

    # determine event provenance
    obj = _content_item_provenance(obj, org_id)

    # split out meta fields
    obj = ingest_util.split_meta(obj, get_table_columns(ContentItem))

    # see if the event already exists.
    c = session.query(ContentItem)\
        .filter_by(org_id=org_id, type=obj['type'], url=obj['url'])\
        .first()

    # if not, create it
    if not c:

        # create event
        c = ContentItem(org_id=org_id, **obj)

    # else, update it
    else:
        for k, v in obj.items():
            setattr(c, k, v)

    # extract urls and normalize urls asynchronously.
    # urls = ingest_util.extract_urls(
    #     obj,
    #     url_fields,
    #     source=data.get('url'),
    #     links=_links)

    # detect content_items
    # if len(_links):
    #     c = _associate_content_items(c, org_id, _links)

    # associate tags
    if len(tag_ids):
        c = _associate_tags(c, org_id, tag_ids, session)

    # associate tags
    if len(authors):
        _authors = _associate_authors(c, org_id, authors, session)
        for a in _authors:
            if a.id not in c.author_ids:
                c.authors.append(a)

    session.add(c)
    session.commit()
    if kill_session:
        session.close()
    return c

Exemple #16

0

Afficher le fichier

Fichier : events_api.py Projet : abelsonlive/newslynx-core

def event_update(user, org, event_id):
    """
    Modify an individual event.
    """
    e = Event.query\
        .filter_by(id=event_id, org_id=org.id)\
        .first()
    if not e:
        raise NotFoundError(
            'An Event with ID {} does not exist.'
            .format(event_id))

    # get request data
    req_data = request_data()

    # fetch tag and thing
    tag_ids = listify_data_arg('tag_ids')
    content_item_ids = listify_data_arg('content_item_ids')

    # a list of content items to apply impact tags to.

    if len(tag_ids):

        tags = Tag.query\
            .filter_by(org_id=org.id)\
            .filter(Tag.id.in_(tag_ids))\
            .all()
        if not len(tags):
            raise RequestError(
                'Tag(s) with ID(s) {} do(es) not exist.'
                .format(tag_ids))

        for tag in tags:
            # validate tag
            if tag.type != 'impact':
                raise RequestError('Events can only be assigned Impact Tags.')
            # add it
            if tag.id not in e.tag_ids:
                e.tags.append(tag)

    if len(content_item_ids):
        content_items = ContentItem.query\
            .filter_by(org_id=org.id)\
            .filter(ContentItem.id.in_(content_item_ids))\
            .all()

        if not len(content_items):
            raise RequestError(
                'ContentItem(s) with ID(s) {} do(es) not exist.'
                .format(tag_ids))

        # add content items
        for c in content_items:
            if c.id not in e.content_item_ids:
                e.content_items.append(c)

    # filter out any non-columns
    columns = get_table_columns(Event)
    for k in req_data.keys():
        if k not in columns:
            req_data.pop(k)

    # update fields
    for k, v in req_data.items():
        setattr(e, k, v)

    # ensure no one sneakily/accidentally
    # updates an organization id
    e.org_id = org.id

    # update event metrics associated with these content item ids
    if len(e.content_item_ids):
        rollup_metric.content_summary_from_events(org, e.content_item_ids)

    # commit changes
    db.session.add(e)
    db.session.commit()

    # return modified event
    return jsonify(e)

Exemple #17

0

Afficher le fichier

Fichier : ingest_event.py Projet : jjelosua/newslynx-core

def ingest(
        obj,
        org_id,
        org_domains,
        url_fields=['title', 'body', 'description'],
        requires=['title'],
        must_link=False,
        kill_session=True):
    """
    Ingest an Event.
    """

    # distinct session for this eventlet.
    session = gen_session()

    has_content_items = False

    # check required fields
    ingest_util.check_requires(obj, requires, type='Event')

    # validate status
    if 'status' in obj:
        validate_event_status(obj['status'])
        if obj['status'] == 'deleted':
            raise RequestError(
                'You cannot create an Event with status "deleted."')

    # check if the org_id is in the body
    # TODO: I don't think this is necessary.
    org_id = obj.pop('org_id', org_id)

    # get rid of ``id`` if it somehow got in here.
    obj.pop('id', None)

    # normalize the url
    obj['url'] = ingest_util.prepare_url(obj, 'url')

    # sanitize creation date
    obj['created'] = ingest_util.prepare_date(obj, 'created')
    if not obj['created']:
        obj.pop('created')

    # sanitize text/html fields
    obj['title'] = ingest_util.prepare_str(obj, 'title', obj['url'])
    obj['description'] = ingest_util.prepare_str(
        obj, 'description', obj['url'])
    obj['body'] = ingest_util.prepare_str(obj, 'body', obj['url'])

    # get thumbnail
    obj['thumbnail'] = ingest_util.prepare_thumbnail(obj, 'img_url')

    # split out tags_ids + content_item_ids
    tag_ids = obj.pop('tag_ids', [])
    content_item_ids = obj.pop('content_item_ids', [])
    links = obj.pop('links', [])

    # determine event provenance
    obj = _event_provenance(obj, org_id, session)

    # split out meta fields
    obj = ingest_util.split_meta(obj, get_table_columns(Event))

    # see if the event already exists.
    e = session.query(Event)\
        .filter_by(org_id=org_id)\
        .filter_by(source_id=obj['source_id'])\
        .first()

    # if not, create it
    if not e:

        # create event
        e = Event(org_id=org_id, **obj)

    # else, update it
    else:
        # if it's deleted, issue a message.
        if e.status == 'deleted':
            raise UnprocessableEntityError(
                'Event {} already exists and has been previously deleted.'
                .format(e.id))

        for k, v in obj.items():
            setattr(e, k, v)

    # extract urls and normalize urls asynchronously.
    links = ingest_util.prepare_links(links, org_domains)

    # detect content_items
    if len(links):
        e, has_content_items = _associate_content_items(
            e, org_id, links, content_item_ids, session)

    # associate tags
    if len(tag_ids):
        e = _associate_tags(e, org_id, tag_ids, session)

    # dont commit event if we're only looking
    # for events that link to content_items
    if not has_content_items and must_link:
        return None

    session.add(e)
    session.commit()
    if kill_session:
        session.close()
    return e

Exemple #18

0

Afficher le fichier

def event_update(user, org, event_id):
    """
    Modify an individual event.
    """
    e = Event.query\
        .filter_by(id=event_id, org_id=org.id)\
        .first()
    if not e:
        raise NotFoundError(
            'An Event with ID {} does not exist.'.format(event_id))

    # get request data
    req_data = request_data()

    # fetch tag and thing
    tag_ids = listify_data_arg('tag_ids')
    content_item_ids = listify_data_arg('content_item_ids')

    # a list of content items to apply impact tags to.

    if len(tag_ids):

        tags = Tag.query\
            .filter_by(org_id=org.id)\
            .filter(Tag.id.in_(tag_ids))\
            .all()
        if not len(tags):
            raise RequestError(
                'Tag(s) with ID(s) {} do(es) not exist.'.format(tag_ids))

        for tag in tags:
            # validate tag
            if tag.type != 'impact':
                raise RequestError('Events can only be assigned Impact Tags.')
            # add it
            if tag.id not in e.tag_ids:
                e.tags.append(tag)

    if len(content_item_ids):
        content_items = ContentItem.query\
            .filter_by(org_id=org.id)\
            .filter(ContentItem.id.in_(content_item_ids))\
            .all()

        if not len(content_items):
            raise RequestError(
                'ContentItem(s) with ID(s) {} do(es) not exist.'.format(
                    tag_ids))

        # add content items
        for c in content_items:
            if c.id not in e.content_item_ids:
                e.content_items.append(c)

    # filter out any non-columns
    columns = get_table_columns(Event)
    for k in req_data.keys():
        if k not in columns:
            req_data.pop(k)

    # update fields
    for k, v in req_data.items():
        setattr(e, k, v)

    # ensure no one sneakily/accidentally
    # updates an organization id
    e.org_id = org.id

    # commit changes
    db.session.add(e)
    db.session.commit()

    # return modified event
    return jsonify(e)

Exemple #19

0

Afficher le fichier

Fichier : ingest_content_item.py Projet : jjelosua/newslynx-core

def ingest(
        obj,
        org_id,
        url_fields=['body'],
        requires=['url', 'type'],
        extract=True,
        kill_session=True):
    """
    Ingest an Event.
    """

    # distinct session for this eventlet.
    session = gen_session()

    # check required fields
    ingest_util.check_requires(obj, requires, type='Content Item')

    # validate type
    validate_content_item_types(obj['type'])

    # check if the org_id is in the body
    # TODO: I don't think this is necessary.
    org_id = obj.pop('org_id', org_id)

    # get rid of ``id`` if it somehow got in here.
    obj.pop('id', None)

    # normalize the url
    obj['url'] = ingest_util.prepare_url(obj, 'url')

    # run article extraction.
    if extract:
        cache_response = extract_cache.get(url=obj['url'], type=obj['type'])
        if not cache_response:

            # make sure to kill this key.
            extract_cache.invalidate(url=obj['url'], type=obj['type'])
            raise RequestError(
                'Extraction failed on {type} - {url}'
                .format(**obj))

        # extraction succeeded
        else:
            data = cache_response.value
            obj.update(data)

    else:
        obj['title'] = ingest_util.prepare_str(obj, 'title')
        obj['description'] = ingest_util.prepare_str(obj, 'description')
        obj['body'] = ingest_util.prepare_str(obj, 'body')
        obj['created'] = ingest_util.prepare_str(obj, 'created')
        if not obj['created']:
            obj.pop('created')

    # get thumbnail
    obj['thumbnail'] = ingest_util.prepare_thumbnail(obj, 'img_url')

    # split out tags_ids + authors + links
    tag_ids = obj.pop('tag_ids', [])
    authors = obj.pop('author_ids', [])
    authors.extend(obj.pop('authors', []))  # accept names too
    # links = obj.pop('links', {})

    # determine event provenance
    obj = _content_item_provenance(obj, org_id)

    # split out meta fields
    obj = ingest_util.split_meta(obj, get_table_columns(ContentItem))

    # see if the event already exists.
    c = session.query(ContentItem)\
        .filter_by(org_id=org_id, type=obj['type'], url=obj['url'])\
        .first()

    # if not, create it
    if not c:

        # create event
        c = ContentItem(org_id=org_id, **obj)

    # else, update it
    else:
        for k, v in obj.items():
            setattr(c, k, v)

    # extract urls and normalize urls asynchronously.
    # urls = ingest_util.extract_urls(
    #     obj,
    #     url_fields,
    #     source=data.get('url'),
    #     links=_links)

    # detect content_items
    # if len(_links):
    #     c = _associate_content_items(c, org_id, _links)

    # associate tags
    if len(tag_ids):
        c = _associate_tags(c, org_id, tag_ids, session)

    # associate tags
    if len(authors):
        _authors = _associate_authors(c, org_id, authors, session)
        for a in _authors:
            if a.id not in c.author_ids:
                c.authors.append(a)

    session.add(c)
    session.commit()
    if kill_session:
        session.close()
    return c

Exemple #20

0

Afficher le fichier

Fichier : ingest_event.py Projet : jjelosua/newslynx-core

def ingest(obj,
           org_id,
           org_domains,
           url_fields=['title', 'body', 'description'],
           requires=['title'],
           must_link=False,
           kill_session=True):
    """
    Ingest an Event.
    """

    # distinct session for this eventlet.
    session = gen_session()

    has_content_items = False

    # check required fields
    ingest_util.check_requires(obj, requires, type='Event')

    # validate status
    if 'status' in obj:
        validate_event_status(obj['status'])
        if obj['status'] == 'deleted':
            raise RequestError(
                'You cannot create an Event with status "deleted."')

    # check if the org_id is in the body
    # TODO: I don't think this is necessary.
    org_id = obj.pop('org_id', org_id)

    # get rid of ``id`` if it somehow got in here.
    obj.pop('id', None)

    # normalize the url
    obj['url'] = ingest_util.prepare_url(obj, 'url')

    # sanitize creation date
    obj['created'] = ingest_util.prepare_date(obj, 'created')
    if not obj['created']:
        obj.pop('created')

    # sanitize text/html fields
    obj['title'] = ingest_util.prepare_str(obj, 'title', obj['url'])
    obj['description'] = ingest_util.prepare_str(obj, 'description',
                                                 obj['url'])
    obj['body'] = ingest_util.prepare_str(obj, 'body', obj['url'])

    # get thumbnail
    obj['thumbnail'] = ingest_util.prepare_thumbnail(obj, 'img_url')

    # split out tags_ids + content_item_ids
    tag_ids = obj.pop('tag_ids', [])
    content_item_ids = obj.pop('content_item_ids', [])
    links = obj.pop('links', [])

    # determine event provenance
    obj = _event_provenance(obj, org_id, session)

    # split out meta fields
    obj = ingest_util.split_meta(obj, get_table_columns(Event))

    # see if the event already exists.
    e = session.query(Event)\
        .filter_by(org_id=org_id)\
        .filter_by(source_id=obj['source_id'])\
        .first()

    # if not, create it
    if not e:

        # create event
        e = Event(org_id=org_id, **obj)

    # else, update it
    else:
        # if it's deleted, issue a message.
        if e.status == 'deleted':
            raise UnprocessableEntityError(
                'Event {} already exists and has been previously deleted.'.
                format(e.id))

        for k, v in obj.items():
            setattr(e, k, v)

    # extract urls and normalize urls asynchronously.
    links = ingest_util.prepare_links(links, org_domains)

    # detect content_items
    if len(links):
        e, has_content_items = _associate_content_items(
            e, org_id, links, content_item_ids, session)

    # associate tags
    if len(tag_ids):
        e = _associate_tags(e, org_id, tag_ids, session)

    # dont commit event if we're only looking
    # for events that link to content_items
    if not has_content_items and must_link:
        return None

    session.add(e)
    session.commit()
    if kill_session:
        session.close()
    return e