def convert_bioentity(sessionmaker, link, chunk_size): log = logging.getLogger('convert.performance.bioentity') log.info('begin') output_creator = OutputCreator(log) try: session = sessionmaker() #Cache current objs from model_perf_schema.bioentity import Bioentity current_objs = session.query(Bioentity).all() id_to_current_obj = dict([(x.id, x) for x in current_objs]) key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs]) untouched_obj_ids = set(id_to_current_obj.keys()) #Grab new objs from backend objs_json = get_json(link()) min_id = 0 count = len(objs_json) num_chunks = ceil(1.0*count/chunk_size) for i in range(0, num_chunks): old_objs = objs_json[min_id:min_id+chunk_size] for obj_json in old_objs: newly_created_obj = create_bioentity(obj_json) current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()] create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, ['format_name', 'class_type', 'dbxref', 'json'], session, output_creator) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_key.id) #Commit output_creator.finished(str(i+1) + "/" + str(int(num_chunks))) session.commit() min_id = min_id+chunk_size #Delete untouched objs for untouched_obj_id in untouched_obj_ids: session.delete(id_to_current_obj[untouched_obj_id]) output_creator.removed() #Commit output_creator.finished() session.commit() except Exception: log.exception('Unexpected error:' + str(sys.exc_info()[0])) finally: session.close() log.info('complete')
def convert_bibentry(sessionmaker, link, cls, chunk_size): log = logging.getLogger('convert.performance.' + cls.__name__) log.info('begin') output_creator = OutputCreator(log) try: session = sessionmaker() #Cache current objs current_objs = session.query(cls).all() id_to_current_obj = dict([(x.id, x) for x in current_objs]) untouched_obj_ids = set(id_to_current_obj.keys()) #Grab new objs from backend objs_json = get_json(link()) min_id = 0 count = len(objs_json) num_chunks = ceil(1.0*count/chunk_size) for i in range(0, num_chunks): old_objs = objs_json[min_id:min_id+chunk_size] for obj_json in old_objs: newly_created_obj = cls(obj_json['id'], obj_json['text']) current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_id, ['json'], session, output_creator) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) #Commit output_creator.finished(str(i+1) + "/" + str(int(num_chunks))) session.commit() min_id = min_id+chunk_size #Delete untouched objs for untouched_obj_id in untouched_obj_ids: session.delete(id_to_current_obj[untouched_obj_id]) output_creator.removed() #Commit output_creator.finished() session.commit() except Exception: log.exception('Unexpected error:' + str(sys.exc_info()[0])) finally: session.close() log.info('complete')
def convert_by_bioentity(sessionmaker, link, cls, bioents): log = logging.getLogger('convert.performance.' + cls.__name__) log.info('begin') output_creator = OutputCreator(log) try: session = sessionmaker() #Cache current objs current_objs = session.query(cls).all() id_to_current_obj = dict([(x.id, x) for x in current_objs]) untouched_obj_ids = set(id_to_current_obj.keys()) #Grab new objs from backend i = 0 for bioent in bioents: json_obj = get_json(link(bioent.format_name, bioent.class_type)) newly_created_obj = cls(bioent.id, json.dumps(json_obj)) current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_id, ['json'], session, output_creator) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) i = i+1 if i%1000 == 0: output_creator.finished(str(i)) session.commit() #Delete untouched objs for untouched_obj_id in untouched_obj_ids: session.delete(id_to_current_obj[untouched_obj_id]) output_creator.removed() #Commit output_creator.finished() session.commit() except Exception: log.exception('Unexpected error:' + str(sys.exc_info()[0])) finally: session.close() log.info('complete')