Python check_data_format Exemples, geordi.utils.check_data_format Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : __init__.py Projet : coderhead42/geordi

def update_automatic_subitem_matches_by_index(index, item, data):
    if index in class_map:
        data = check_data_format(data)
        matches = get_automatic_subitem_matches_by_index(index, data)
        fakeip = 'internal, matched by index {}'.format(index)
        order = ['work', 'recording', 'label', 'artist', 'release', 'release_group']
        changed = False
        for (subitem_id, subitem_matches) in matches.iteritems():
            try:
                document = es.get(index, 'subitem', subitem_id)
                data = document['_source']
            except ElasticHttpNotFoundError:
                data = {}

            data = check_data_format(data)
            automatches = data['_geordi']['matchings']['auto_matchings']
            # Do matches with more linked items first, then supersede with fewer-ID matches
            for (matchtype, mbids) in sorted(subitem_matches.iteritems(), key=lambda x: (len(x[1]), order.index(x[0]) if x[0] in order else 999), reverse=True):
                if (
                    fakeip not in [match.get('ip') for match in automatches] or
                    ",".join(sorted(mbids)) not in [",".join(sorted(match.get('mbid', []))) for match in automatches]
                ):
                    register_match(index, subitem_id, 'subitem', matchtype, mbids, auto=True, user='******', ip=fakeip)
                    changed = True
                else: continue
        return changed

Exemple #2

0

Afficher le fichier

Fichier : data.py Projet : CallerNo6/geordi

def get_subitem(index, subitem, create=False, seed={}):
    try:
        document = es.get(index, 'subitem', subitem)
        if seed == {}:
            return document
        else:
            data = document['_source']
            data = check_data_format(data)
            changed = False
            for key in seed.keys():
                if key not in data:
                    data[key] = seed[key]
                    changed = True
                elif key in data:
                    changed = True
                    try:
                        if unicode(seed[key]) not in [unicode(i) for i in data[key]]:
                            data[key].append(seed[key])
                        changed = True
                    except (AttributeError, TypeError):
                        data[key] = list(flatten([data[key], seed[key]]))
                        changed = True
                    if isinstance(data[key], collections.Iterable) and not isinstance(data[key], basestring):
                        data[key] = list(set(flatten(data[key])))
            if changed:
                es.index(index, 'subitem', data, id=subitem)
                document = es.get(index, 'subitem', subitem)
            return document
    except ElasticHttpNotFoundError:
        if create:
            data = check_data_format(seed)
            es.index(index, 'subitem', data, id=subitem)
        return None

Exemple #3

0

Afficher le fichier

def update_automatic_item_matches_by_index(index, item, data):
    if index in class_map:
        data = check_data_format(data)
        matches = get_automatic_item_matches_by_index(index, data)
        fakeip = 'internal, matched by index {}'.format(index)
        automatches = data['_geordi']['matchings']['auto_matchings']
        changed = False
        order = [
            'work', 'recording', 'label', 'artist', 'release', 'release_group'
        ]
        # Do matches with more linked items first, then supersede with fewer-ID matches
        for (matchtype, mbids) in sorted(matches.iteritems(),
                                         key=lambda x:
                                         (len(x[1]), order.index(x[0])
                                          if x[0] in order else 999),
                                         reverse=True):
            if (fakeip not in [match.get('ip') for match in automatches]
                    or ",".join(sorted(mbids)) not in [
                        ",".join(sorted(match.get('mbid', [])))
                        for match in automatches
                    ]):
                register_match(index,
                               item,
                               'item',
                               matchtype,
                               mbids,
                               auto=True,
                               user='******',
                               ip=fakeip)
                changed = True
            else:
                continue
        return changed

Exemple #4

0

Afficher le fichier

Fichier : __init__.py Projet : TushRaj/geordi

def update_automatic_subitem_matches_by_index(index, item, data):
    if index in class_map:
        data = check_data_format(data)
        matches = get_index(index).automatic_subitem_matches(data)
        changed = False
        for (subitem_id, subitem_matches) in matches.iteritems():
            try:
                document = es.get(index, "subitem", subitem_id)
                data = document["_source"]
            except ElasticHttpNotFoundError:
                data = {}

            data = check_data_format(data)
            existing = data["_geordi"]["matchings"]["auto_matchings"]
            subitem_changed = register_matches(index, "subitem", subitem_id, subitem_matches, existing)
            if not changed:
                changed = subitem_changed
        return changed

Exemple #5

0

Afficher le fichier

def update_automatic_subitem_matches_by_index(index, item, data):
    if index in class_map:
        data = check_data_format(data)
        matches = get_index(index).automatic_subitem_matches(data)
        changed = False
        for (subitem_id, subitem_matches) in matches.iteritems():
            try:
                document = es.get(index, 'subitem', subitem_id)
                data = document['_source']
            except ElasticHttpNotFoundError:
                data = {}

            data = check_data_format(data)
            existing = data['_geordi']['matchings']['auto_matchings']
            subitem_changed = register_matches(index, 'subitem', subitem_id,
                                               subitem_matches, existing)
            if not changed:
                changed = subitem_changed
        return changed

Exemple #6

0

Afficher le fichier

Fichier : data.py Projet : imclab/geordi

def get_subitem(index, subitem, create=False, seed={}):
    try:
        document = es.get(index, 'subitem', subitem)
        if seed == {}:
            return document
        else:
            data = document['_source']
            data = check_data_format(data)
            changed = False
            for key in seed.keys():
                if key not in data:
                    data[key] = seed[key]
                    changed = True
                elif key in data:
                    changed = True
                    try:
                        if unicode(seed[key]) not in [
                                unicode(i) for i in data[key]
                        ]:
                            data[key].append(seed[key])
                        changed = True
                    except (AttributeError, TypeError):
                        data[key] = list(flatten([data[key], seed[key]]))
                        changed = True
                    if isinstance(data[key],
                                  collections.Iterable) and not isinstance(
                                      data[key], basestring):
                        data[key] = list(set(flatten(data[key])))
            if changed:
                es.index(index, 'subitem', data, id=subitem)
                document = es.get(index, 'subitem', subitem)
            return document
    except ElasticHttpNotFoundError:
        if create:
            data = check_data_format(seed)
            es.index(index, 'subitem', data, id=subitem)
        return None

Exemple #7

0

Afficher le fichier

Fichier : __init__.py Projet : TushRaj/geordi

def update_linked_by_index(index, item, data):
    if index in class_map:
        try:
            document = es.get(index, "item", item)
        except ElasticHttpNotFoundError:
            return None

        data = document["_source"]
        version = document["_version"]

        data = check_data_format(data)

        links = class_map[index].extract_linked(data)
        currentlinks = data["_geordi"]["links"]["links"]
        same = True

        try:
            if currentlinks["version"] != links["version"]:
                same = False
            elif len(links.keys()) != len(currentlinks.keys()):
                same = False
            else:
                for category in class_map[index].link_types().keys():
                    if len(links[category]) != len(currentlinks[category]):
                        same = False
                    elif links[category] != currentlinks[category]:
                        same = False
                    if not same:
                        break
        except:
            same = False

        if not same:
            data["_geordi"]["links"]["links"] = links
            try:
                es.index(index, "item", data, id=item, es_version=version)
                return True
            except:
                return None
        else:
            return False

Exemple #8

0

Afficher le fichier

def update_linked_by_index(index, item, data):
    if index in class_map:
        try:
            document = es.get(index, 'item', item)
        except ElasticHttpNotFoundError:
            return None

        data = document['_source']
        version = document['_version']

        data = check_data_format(data)

        links = class_map[index].extract_linked(data)
        currentlinks = data['_geordi']['links']['links']
        same = True

        try:
            if currentlinks['version'] != links['version']:
                same = False
            elif len(links.keys()) != len(currentlinks.keys()):
                same = False
            else:
                for category in class_map[index].link_types().keys():
                    if len(links[category]) != len(currentlinks[category]):
                        same = False
                    elif links[category] != currentlinks[category]:
                        same = False
                    if not same:
                        break
        except:
            same = False

        if not same:
            data['_geordi']['links']['links'] = links
            try:
                es.index(index, 'item', data, id=item, es_version=version)
                return True
            except:
                return None
        else:
            return False

Exemple #9

0

Afficher le fichier

Fichier : __init__.py Projet : warpr/geordi

def update_linked_by_index(index, item, data):
    if index in class_map:
        try:
            document = es.get(index, 'item', item)
        except ElasticHttpNotFoundError:
            return None

        data = document['_source']
        version = document['_version']

        data = check_data_format(data)

        links = class_map[index].extract_linked(data)
        currentlinks = data['_geordi']['links']['links']
        same = True

        try:
            if (currentlinks['version'] != links['version'] or
                len(links.keys()) != len(currentlinks.keys()) or
                [len(link[1]) for link in links.iteritems() if link[0] != 'version'] != [len(link[1]) for link in currentlinks.iteritems() if link[0] != 'version']):
                same = False
            else:
                for category in class_map[index].link_types().keys():
                    if links[category] != currentlinks[category]:
                        same = False
        except:
            same = False

        if not same:
            data['_geordi']['links']['links'] = links
            try:
                es.index(index, 'item', data, id=item, es_version=version)
                return True
            except:
                return None
        else:
            return False

Exemple #10

0

Afficher le fichier

def update_map_by_index(index, item, data):
    if index in class_map:
        try:
            document = es.get(index, 'item', item)
        except ElasticHttpNotFoundError:
            return None

        data = document['_source']
        version = document['_version']

        data = check_data_format(data)

        currentmapping = data['_geordi']['mapping']
        mapping = get_map_by_index(index, data)

        if not currentmapping['version'] == mapping['version']:
            data['_geordi']['mapping'] = mapping
            try:
                es.index(index, 'item', data, id=item, es_version=version)
                return True
            except:
                return None
        else:
            return False

Exemple #11

0

Afficher le fichier

Fichier : __init__.py Projet : TushRaj/geordi

def update_map_by_index(index, item, data):
    if index in class_map:
        try:
            document = es.get(index, "item", item)
        except ElasticHttpNotFoundError:
            return None

        data = document["_source"]
        version = document["_version"]

        data = check_data_format(data)

        currentmapping = data["_geordi"]["mapping"]
        mapping = get_index(index).map(data)

        if not currentmapping["version"] == mapping["version"]:
            data["_geordi"]["mapping"] = mapping
            try:
                es.index(index, "item", data, id=item, es_version=version)
                return True
            except:
                return None
        else:
            return False

Exemple #12

0

Afficher le fichier

Fichier : __init__.py Projet : coderhead42/geordi

def update_map_by_index(index, item, data):
    if index in class_map:
        try:
            document = es.get(index, 'item', item)
        except ElasticHttpNotFoundError:
            return None

        data = document['_source']
        version = document['_version']

        data = check_data_format(data)

        currentmapping = data['_geordi']['mapping']
        mapping = get_map_by_index(index, data)

        if not currentmapping['version'] == mapping['version']:
            data['_geordi']['mapping'] = mapping
            try:
                es.index(index, 'item', data, id=item, es_version=version)
                return True
            except:
                return None
        else:
            return False

Exemple #13

0

Afficher le fichier

Fichier : __init__.py Projet : TushRaj/geordi

def update_automatic_item_matches_by_index(index, item, data):
    if index in class_map:
        data = check_data_format(data)
        matches = get_index(index).automatic_item_matches(data)
        existing = data["_geordi"]["matchings"]["auto_matchings"]
        return register_matches(index, "item", item, matches, existing)

Exemple #14

0

Afficher le fichier

def update_individual_subitem_matches_by_index(index, subitem, data):
    if index in class_map:
        data = check_data_format(data)
        matches = get_index(index).individual_subitem_matches(subitem, data)
        existing = data['_geordi']['matchings']['auto_matchings']
        return register_matches(index, 'subitem', subitem, matches, existing)

Exemple #15

0

Afficher le fichier

Fichier : __init__.py Projet : TushRaj/geordi

def update_individual_subitem_matches_by_index(index, subitem, data):
    if index in class_map:
        data = check_data_format(data)
        matches = get_index(index).individual_subitem_matches(subitem, data)
        existing = data["_geordi"]["matchings"]["auto_matchings"]
        return register_matches(index, "subitem", subitem, matches, existing)

Exemple #16

0

Afficher le fichier

def register_match(index, item, itemtype, matchtype, mbids, auto=False, user=None, ip=False):
    if matchtype != 'unmatch':
        if len(mbids) < 1:
            response = Response(json.dumps({'code': 400, 'error': 'You must provide at least one MBID for a match.'}), 400, mimetype="application/json")
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response
        # Check MBID formatting
        try:
            [uuid.UUID('{{{uuid}}}'.format(uuid=mbid)) for mbid in mbids]
        except ValueError:
            response = Response(json.dumps({'code': 400, 'error': 'A provided MBID is ill-formed'}), 400, mimetype="application/json")
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response

        for mbid in mbids:
            check = check_type(mbid)
            if 'error' in check:
                response = Response(json.dumps({'code': 400, 'error': 'MBID {} cannot be found in MusicBrainz'.format(mbid)}), 400, mimetype="application/json")
                response.headers.add('Access-Control-Allow-Origin', '*')
                return response
            elif check['type'] != matchtype:
                response = Response(json.dumps({'code': 400, 'error': 'Provided match type {provided} doesn\'t match type {mbidtype} of {mbid}'.format(provided=matchtype, mbidtype=check['type'], mbid=mbid)}), 400, mimetype="application/json")
                response.headers.add('Access-Control-Allow-Origin', '*')
                return response
            else: continue

    # Retrieve document (or blank empty document for subitems)
    try:
        document = es.get(index, itemtype, item)
        data = document['_source']
        version = document['_version']
    except ElasticHttpNotFoundError:
        if itemtype == 'item':
            response = Response(json.dumps({'code': 404, 'error': 'The provided item could not be found.'}), 404, mimetype="application/json")
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response
        else:
            data = {}
            version = None

    data = check_data_format(data)

    if auto:
        if not user:
            response = Response(json.dumps({'code': 400, 'error': 'Automatic matches must provide a name.'}), 400, mimetype="application/json")
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response
        if not ip:
            try:
                ip = request.environ['HTTP_X_FORWARDED_FOR'].split(',')[-1].strip()
            except KeyError:
                ip = request.environ['REMOTE_ADDR']
    else:
        user = current_user.id
        ip = False

    match = make_match_definition(user, matchtype, mbids, auto, ip)
    if (
        (not auto or
         len(data['_geordi']['matchings']['matchings']) == 0 or
         data['_geordi']['matchings']['current_matching']['auto'])
        and matchtype != 'unmatch'
    ):
        data['_geordi']['matchings']['current_matching'] = match
    if not auto:
        data['_geordi']['matchings']['matchings'].append(match)
    else:
        data['_geordi']['matchings']['auto_matchings'].append(match)

    if matchtype == 'unmatch':
        data['_geordi']['matchings']['current_matching'] = {}

    try:
        if version:
            es.index(index, itemtype, data, id=item, es_version=version)
        else:
            es.index(index, itemtype, data, id=item)
        response = Response(json.dumps({'code': 200}), 200, mimetype="application/json")
        response.headers.add('Access-Control-Allow-Origin', '*')
        return response
    except:
        response = Response(json.dumps({'code': 500, 'error': 'An unknown error happened while pushing to elasticsearch.'}), 500, mimetype="application/json")
        response.headers.add('Access-Control-Allow-Origin', '*')
        return response

Exemple #17

0

Afficher le fichier

def update_automatic_item_matches_by_index(index, item, data):
    if index in class_map:
        data = check_data_format(data)
        matches = get_index(index).automatic_item_matches(data)
        existing = data['_geordi']['matchings']['auto_matchings']
        return register_matches(index, 'item', item, matches, existing)

Exemple #18

0

Afficher le fichier

Fichier : matching.py Projet : warpr/geordi

def register_match(index, item, itemtype, matchtype, mbids, auto=False, user=None, ip=False):
    if matchtype != 'unmatch':
        if len(mbids) < 1:
            response = Response(json.dumps({'code': 400, 'error': 'You must provide at least one MBID for a match.'}), 400, mimetype="application/json")
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response
        # Check MBID formatting
        try:
            [uuid.UUID('{{{uuid}}}'.format(uuid=mbid)) for mbid in mbids]
        except ValueError:
            response = Response(json.dumps({'code': 400, 'error': 'A provided MBID is ill-formed'}), 400, mimetype="application/json")
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response

        for mbid in mbids:
            check = check_type(mbid)
            if 'error' in check:
                response = Response(json.dumps({'code': 400, 'error': 'MBID {} cannot be found in MusicBrainz'.format(mbid)}), 400, mimetype="application/json")
                response.headers.add('Access-Control-Allow-Origin', '*')
                return response
            elif check['type'] != matchtype:
                response = Response(json.dumps({'code': 400, 'error': 'Provided match type {provided} doesn\'t match type {mbidtype} of {mbid}'.format(provided=matchtype, mbidtype=check['type'], mbid=mbid)}), 400, mimetype="application/json")
                response.headers.add('Access-Control-Allow-Origin', '*')
                return response
            else: continue

    # Retrieve document (or blank empty document for subitems)
    try:
        document = es.get(index, itemtype, item)
        data = document['_source']
        version = document['_version']
    except ElasticHttpNotFoundError:
        if itemtype == 'item':
            response = Response(json.dumps({'code': 404, 'error': 'The provided item could not be found.'}), 404, mimetype="application/json")
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response
        else:
            data = {}
            version = None

    data = check_data_format(data)

    if auto:
        if not user:
            response = Response(json.dumps({'code': 400, 'error': 'Automatic matches must provide a name.'}), 400, mimetype="application/json")
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response
        if not ip:
            try:
                ip = request.environ['HTTP_X_FORWARDED_FOR'].split(',')[-1].strip()
            except KeyError:
                ip = request.environ['REMOTE_ADDR']
    else:
        user = current_user.id
        ip = False

    match = make_match_definition(user, matchtype, mbids, auto, ip)
    if ((not auto or
        len(data['_geordi']['matchings']['matchings']) == 0 or
        data['_geordi']['matchings']['current_matching']['auto'])
          and matchtype != 'unmatch'):
        data['_geordi']['matchings']['current_matching'] = match
    if not auto:
        data['_geordi']['matchings']['matchings'].append(match)
    else:
        data['_geordi']['matchings']['auto_matchings'].append(match)

    if matchtype == 'unmatch':
        data['_geordi']['matchings']['current_matching'] = {}

    try:
        if version:
            es.index(index, itemtype, data, id=item, es_version=version)
        else:
            es.index(index, itemtype, data, id=item)
        response = Response(json.dumps({'code': 200}), 200, mimetype="application/json")
        response.headers.add('Access-Control-Allow-Origin', '*')
        return response
    except:
        response = Response(json.dumps({'code': 500, 'error': 'An unknown error happened while pushing to elasticsearch.'}), 500, mimetype="application/json")
        response.headers.add('Access-Control-Allow-Origin', '*')
        return response