def update_automatic_subitem_matches_by_index(index, item, data): if index in class_map: data = check_data_format(data) matches = get_automatic_subitem_matches_by_index(index, data) fakeip = 'internal, matched by index {}'.format(index) order = ['work', 'recording', 'label', 'artist', 'release', 'release_group'] changed = False for (subitem_id, subitem_matches) in matches.iteritems(): try: document = es.get(index, 'subitem', subitem_id) data = document['_source'] except ElasticHttpNotFoundError: data = {} data = check_data_format(data) automatches = data['_geordi']['matchings']['auto_matchings'] # Do matches with more linked items first, then supersede with fewer-ID matches for (matchtype, mbids) in sorted(subitem_matches.iteritems(), key=lambda x: (len(x[1]), order.index(x[0]) if x[0] in order else 999), reverse=True): if ( fakeip not in [match.get('ip') for match in automatches] or ",".join(sorted(mbids)) not in [",".join(sorted(match.get('mbid', []))) for match in automatches] ): register_match(index, subitem_id, 'subitem', matchtype, mbids, auto=True, user='******', ip=fakeip) changed = True else: continue return changed
def get_subitem(index, subitem, create=False, seed={}): try: document = es.get(index, 'subitem', subitem) if seed == {}: return document else: data = document['_source'] data = check_data_format(data) changed = False for key in seed.keys(): if key not in data: data[key] = seed[key] changed = True elif key in data: changed = True try: if unicode(seed[key]) not in [unicode(i) for i in data[key]]: data[key].append(seed[key]) changed = True except (AttributeError, TypeError): data[key] = list(flatten([data[key], seed[key]])) changed = True if isinstance(data[key], collections.Iterable) and not isinstance(data[key], basestring): data[key] = list(set(flatten(data[key]))) if changed: es.index(index, 'subitem', data, id=subitem) document = es.get(index, 'subitem', subitem) return document except ElasticHttpNotFoundError: if create: data = check_data_format(seed) es.index(index, 'subitem', data, id=subitem) return None
def update_automatic_item_matches_by_index(index, item, data): if index in class_map: data = check_data_format(data) matches = get_automatic_item_matches_by_index(index, data) fakeip = 'internal, matched by index {}'.format(index) automatches = data['_geordi']['matchings']['auto_matchings'] changed = False order = [ 'work', 'recording', 'label', 'artist', 'release', 'release_group' ] # Do matches with more linked items first, then supersede with fewer-ID matches for (matchtype, mbids) in sorted(matches.iteritems(), key=lambda x: (len(x[1]), order.index(x[0]) if x[0] in order else 999), reverse=True): if (fakeip not in [match.get('ip') for match in automatches] or ",".join(sorted(mbids)) not in [ ",".join(sorted(match.get('mbid', []))) for match in automatches ]): register_match(index, item, 'item', matchtype, mbids, auto=True, user='******', ip=fakeip) changed = True else: continue return changed
def update_automatic_subitem_matches_by_index(index, item, data): if index in class_map: data = check_data_format(data) matches = get_index(index).automatic_subitem_matches(data) changed = False for (subitem_id, subitem_matches) in matches.iteritems(): try: document = es.get(index, "subitem", subitem_id) data = document["_source"] except ElasticHttpNotFoundError: data = {} data = check_data_format(data) existing = data["_geordi"]["matchings"]["auto_matchings"] subitem_changed = register_matches(index, "subitem", subitem_id, subitem_matches, existing) if not changed: changed = subitem_changed return changed
def update_automatic_subitem_matches_by_index(index, item, data): if index in class_map: data = check_data_format(data) matches = get_index(index).automatic_subitem_matches(data) changed = False for (subitem_id, subitem_matches) in matches.iteritems(): try: document = es.get(index, 'subitem', subitem_id) data = document['_source'] except ElasticHttpNotFoundError: data = {} data = check_data_format(data) existing = data['_geordi']['matchings']['auto_matchings'] subitem_changed = register_matches(index, 'subitem', subitem_id, subitem_matches, existing) if not changed: changed = subitem_changed return changed
def get_subitem(index, subitem, create=False, seed={}): try: document = es.get(index, 'subitem', subitem) if seed == {}: return document else: data = document['_source'] data = check_data_format(data) changed = False for key in seed.keys(): if key not in data: data[key] = seed[key] changed = True elif key in data: changed = True try: if unicode(seed[key]) not in [ unicode(i) for i in data[key] ]: data[key].append(seed[key]) changed = True except (AttributeError, TypeError): data[key] = list(flatten([data[key], seed[key]])) changed = True if isinstance(data[key], collections.Iterable) and not isinstance( data[key], basestring): data[key] = list(set(flatten(data[key]))) if changed: es.index(index, 'subitem', data, id=subitem) document = es.get(index, 'subitem', subitem) return document except ElasticHttpNotFoundError: if create: data = check_data_format(seed) es.index(index, 'subitem', data, id=subitem) return None
def update_linked_by_index(index, item, data): if index in class_map: try: document = es.get(index, "item", item) except ElasticHttpNotFoundError: return None data = document["_source"] version = document["_version"] data = check_data_format(data) links = class_map[index].extract_linked(data) currentlinks = data["_geordi"]["links"]["links"] same = True try: if currentlinks["version"] != links["version"]: same = False elif len(links.keys()) != len(currentlinks.keys()): same = False else: for category in class_map[index].link_types().keys(): if len(links[category]) != len(currentlinks[category]): same = False elif links[category] != currentlinks[category]: same = False if not same: break except: same = False if not same: data["_geordi"]["links"]["links"] = links try: es.index(index, "item", data, id=item, es_version=version) return True except: return None else: return False
def update_linked_by_index(index, item, data): if index in class_map: try: document = es.get(index, 'item', item) except ElasticHttpNotFoundError: return None data = document['_source'] version = document['_version'] data = check_data_format(data) links = class_map[index].extract_linked(data) currentlinks = data['_geordi']['links']['links'] same = True try: if currentlinks['version'] != links['version']: same = False elif len(links.keys()) != len(currentlinks.keys()): same = False else: for category in class_map[index].link_types().keys(): if len(links[category]) != len(currentlinks[category]): same = False elif links[category] != currentlinks[category]: same = False if not same: break except: same = False if not same: data['_geordi']['links']['links'] = links try: es.index(index, 'item', data, id=item, es_version=version) return True except: return None else: return False
def update_linked_by_index(index, item, data): if index in class_map: try: document = es.get(index, 'item', item) except ElasticHttpNotFoundError: return None data = document['_source'] version = document['_version'] data = check_data_format(data) links = class_map[index].extract_linked(data) currentlinks = data['_geordi']['links']['links'] same = True try: if (currentlinks['version'] != links['version'] or len(links.keys()) != len(currentlinks.keys()) or [len(link[1]) for link in links.iteritems() if link[0] != 'version'] != [len(link[1]) for link in currentlinks.iteritems() if link[0] != 'version']): same = False else: for category in class_map[index].link_types().keys(): if links[category] != currentlinks[category]: same = False except: same = False if not same: data['_geordi']['links']['links'] = links try: es.index(index, 'item', data, id=item, es_version=version) return True except: return None else: return False
def update_map_by_index(index, item, data): if index in class_map: try: document = es.get(index, 'item', item) except ElasticHttpNotFoundError: return None data = document['_source'] version = document['_version'] data = check_data_format(data) currentmapping = data['_geordi']['mapping'] mapping = get_map_by_index(index, data) if not currentmapping['version'] == mapping['version']: data['_geordi']['mapping'] = mapping try: es.index(index, 'item', data, id=item, es_version=version) return True except: return None else: return False
def update_map_by_index(index, item, data): if index in class_map: try: document = es.get(index, "item", item) except ElasticHttpNotFoundError: return None data = document["_source"] version = document["_version"] data = check_data_format(data) currentmapping = data["_geordi"]["mapping"] mapping = get_index(index).map(data) if not currentmapping["version"] == mapping["version"]: data["_geordi"]["mapping"] = mapping try: es.index(index, "item", data, id=item, es_version=version) return True except: return None else: return False
def update_automatic_item_matches_by_index(index, item, data): if index in class_map: data = check_data_format(data) matches = get_index(index).automatic_item_matches(data) existing = data["_geordi"]["matchings"]["auto_matchings"] return register_matches(index, "item", item, matches, existing)
def update_individual_subitem_matches_by_index(index, subitem, data): if index in class_map: data = check_data_format(data) matches = get_index(index).individual_subitem_matches(subitem, data) existing = data['_geordi']['matchings']['auto_matchings'] return register_matches(index, 'subitem', subitem, matches, existing)
def update_individual_subitem_matches_by_index(index, subitem, data): if index in class_map: data = check_data_format(data) matches = get_index(index).individual_subitem_matches(subitem, data) existing = data["_geordi"]["matchings"]["auto_matchings"] return register_matches(index, "subitem", subitem, matches, existing)
def register_match(index, item, itemtype, matchtype, mbids, auto=False, user=None, ip=False): if matchtype != 'unmatch': if len(mbids) < 1: response = Response(json.dumps({'code': 400, 'error': 'You must provide at least one MBID for a match.'}), 400, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response # Check MBID formatting try: [uuid.UUID('{{{uuid}}}'.format(uuid=mbid)) for mbid in mbids] except ValueError: response = Response(json.dumps({'code': 400, 'error': 'A provided MBID is ill-formed'}), 400, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response for mbid in mbids: check = check_type(mbid) if 'error' in check: response = Response(json.dumps({'code': 400, 'error': 'MBID {} cannot be found in MusicBrainz'.format(mbid)}), 400, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response elif check['type'] != matchtype: response = Response(json.dumps({'code': 400, 'error': 'Provided match type {provided} doesn\'t match type {mbidtype} of {mbid}'.format(provided=matchtype, mbidtype=check['type'], mbid=mbid)}), 400, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response else: continue # Retrieve document (or blank empty document for subitems) try: document = es.get(index, itemtype, item) data = document['_source'] version = document['_version'] except ElasticHttpNotFoundError: if itemtype == 'item': response = Response(json.dumps({'code': 404, 'error': 'The provided item could not be found.'}), 404, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response else: data = {} version = None data = check_data_format(data) if auto: if not user: response = Response(json.dumps({'code': 400, 'error': 'Automatic matches must provide a name.'}), 400, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response if not ip: try: ip = request.environ['HTTP_X_FORWARDED_FOR'].split(',')[-1].strip() except KeyError: ip = request.environ['REMOTE_ADDR'] else: user = current_user.id ip = False match = make_match_definition(user, matchtype, mbids, auto, ip) if ( (not auto or len(data['_geordi']['matchings']['matchings']) == 0 or data['_geordi']['matchings']['current_matching']['auto']) and matchtype != 'unmatch' ): data['_geordi']['matchings']['current_matching'] = match if not auto: data['_geordi']['matchings']['matchings'].append(match) else: data['_geordi']['matchings']['auto_matchings'].append(match) if matchtype == 'unmatch': data['_geordi']['matchings']['current_matching'] = {} try: if version: es.index(index, itemtype, data, id=item, es_version=version) else: es.index(index, itemtype, data, id=item) response = Response(json.dumps({'code': 200}), 200, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response except: response = Response(json.dumps({'code': 500, 'error': 'An unknown error happened while pushing to elasticsearch.'}), 500, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response
def update_automatic_item_matches_by_index(index, item, data): if index in class_map: data = check_data_format(data) matches = get_index(index).automatic_item_matches(data) existing = data['_geordi']['matchings']['auto_matchings'] return register_matches(index, 'item', item, matches, existing)
def register_match(index, item, itemtype, matchtype, mbids, auto=False, user=None, ip=False): if matchtype != 'unmatch': if len(mbids) < 1: response = Response(json.dumps({'code': 400, 'error': 'You must provide at least one MBID for a match.'}), 400, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response # Check MBID formatting try: [uuid.UUID('{{{uuid}}}'.format(uuid=mbid)) for mbid in mbids] except ValueError: response = Response(json.dumps({'code': 400, 'error': 'A provided MBID is ill-formed'}), 400, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response for mbid in mbids: check = check_type(mbid) if 'error' in check: response = Response(json.dumps({'code': 400, 'error': 'MBID {} cannot be found in MusicBrainz'.format(mbid)}), 400, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response elif check['type'] != matchtype: response = Response(json.dumps({'code': 400, 'error': 'Provided match type {provided} doesn\'t match type {mbidtype} of {mbid}'.format(provided=matchtype, mbidtype=check['type'], mbid=mbid)}), 400, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response else: continue # Retrieve document (or blank empty document for subitems) try: document = es.get(index, itemtype, item) data = document['_source'] version = document['_version'] except ElasticHttpNotFoundError: if itemtype == 'item': response = Response(json.dumps({'code': 404, 'error': 'The provided item could not be found.'}), 404, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response else: data = {} version = None data = check_data_format(data) if auto: if not user: response = Response(json.dumps({'code': 400, 'error': 'Automatic matches must provide a name.'}), 400, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response if not ip: try: ip = request.environ['HTTP_X_FORWARDED_FOR'].split(',')[-1].strip() except KeyError: ip = request.environ['REMOTE_ADDR'] else: user = current_user.id ip = False match = make_match_definition(user, matchtype, mbids, auto, ip) if ((not auto or len(data['_geordi']['matchings']['matchings']) == 0 or data['_geordi']['matchings']['current_matching']['auto']) and matchtype != 'unmatch'): data['_geordi']['matchings']['current_matching'] = match if not auto: data['_geordi']['matchings']['matchings'].append(match) else: data['_geordi']['matchings']['auto_matchings'].append(match) if matchtype == 'unmatch': data['_geordi']['matchings']['current_matching'] = {} try: if version: es.index(index, itemtype, data, id=item, es_version=version) else: es.index(index, itemtype, data, id=item) response = Response(json.dumps({'code': 200}), 200, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response except: response = Response(json.dumps({'code': 500, 'error': 'An unknown error happened while pushing to elasticsearch.'}), 500, mimetype="application/json") response.headers.add('Access-Control-Allow-Origin', '*') return response