def _create_distance_signature(signatures_map, uuid): """Create a signature for a similarity measurement. Given a UUID as the parameter, the method creates a signature, which will be used to calculate the similarity against claimed signatures. :param uuid: A string representing UUID of a given signature. Example: uuid = 'd63537a8-1df4-4436-b5ed-224da5b5028c' :return: Example: {u'affiliations': u'Yerevan Phys. Inst.', u'publication_id': u'13c3cca8-b0bf-42f5-90d4-e3dfcced0511', u'full_name': u'Chatrchyan, Serguei', u'uuid': u'd63537a8-1df4-4436-b5ed-224da5b5028c'} """ record = create_beard_record( signatures_map[uuid].get('publication_id')) beard_signature = signatures_map[uuid].copy() beard_signature['publication'] = record return beard_signature
def _create_distance_signature(signatures_map, uuid): """Create a signature for a similarity measurement. Given a UUID as the parameter, the method creates a signature, which will be used to calculate the similarity against claimed signatures. :param uuid: A string representing UUID of a given signature. Example: uuid = 'd63537a8-1df4-4436-b5ed-224da5b5028c' :return: Example: {u'affiliations': u'Yerevan Phys. Inst.', u'publication_id': u'13c3cca8-b0bf-42f5-90d4-e3dfcced0511', u'full_name': u'Chatrchyan, Serguei', u'uuid': u'd63537a8-1df4-4436-b5ed-224da5b5028c'} """ record = create_beard_record(signatures_map[uuid].get('publication_id')) beard_signature = signatures_map[uuid].copy() beard_signature['publication'] = record return beard_signature
def disambiguation_clustering(phonetic_block): """Cluster phonetic blocks in parallel. The method receives a phonetic block as an argument. In order to proceed with clustering, the method creates two lists representing records containing given phonetic block (required by Beard) and signatures of the block. """ try: logger.info("Clustering: %s" % phonetic_block) records = [] signatures = [] # Get all the records containing specific phonetic block. records_ids = get_records_from_block(phonetic_block) # Create records and signatures in Beard readable format. for record_id in records_ids: records.append(create_beard_record(record_id)) signatures.extend(create_beard_signatures( record_id, phonetic_block)) # Dispatch clustering job to Beard Celery service. try: clusters_matched, clusters_created = make_beard_clusters( records, signatures).get() except AttributeError: clusters_matched = {} clusters_created = {} # Update recids of signatures to existing profiles. if clusters_matched: for profile_recid, beard_uuids in clusters_matched.iteritems(): process_clusters(beard_uuids, signatures, profile_recid) # Create new profiles. if clusters_created: for beard_uuids in list(clusters_created.values()): process_clusters(beard_uuids, signatures) db.session.commit() finally: db.session.close()
def disambiguation_clustering(phonetic_block): """Cluster phonetic blocks in parallel. The method receives a phonetic block as an argument. In order to proceed with clustering, the method creates two lists representing records containing given phonetic block (required by Beard) and signatures of the block. """ try: logger.info("Clustering: %s" % phonetic_block) records = [] signatures = [] # Get all the records containing specific phonetic block. records_ids = get_records_from_block(phonetic_block) # Create records and signatures in Beard readable format. for record_id in records_ids: records.append(create_beard_record(record_id)) signatures.extend( create_beard_signatures(record_id, phonetic_block)) # Dispatch clustering job to Beard Celery service. try: clusters_matched, clusters_created = make_beard_clusters( records, signatures).get() except AttributeError: clusters_matched = {} clusters_created = {} # Update recids of signatures to existing profiles. if clusters_matched: for profile_recid, beard_uuids in clusters_matched.iteritems(): process_clusters(beard_uuids, signatures, profile_recid) # Create new profiles. if clusters_created: for beard_uuids in list(clusters_created.values()): process_clusters(beard_uuids, signatures) db.session.commit() finally: db.session.close()