Example #1
0
def convert_bioentity(sessionmaker, link, chunk_size):
    log = logging.getLogger('convert.performance.bioentity')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        session = sessionmaker()
        
        #Cache current objs
        from model_perf_schema.bioentity import Bioentity
        current_objs = session.query(Bioentity).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab new objs from backend
        objs_json = get_json(link())
        
        min_id = 0
        count = len(objs_json)
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):
            old_objs = objs_json[min_id:min_id+chunk_size]
            for obj_json in old_objs:
                newly_created_obj = create_bioentity(obj_json)
                current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, ['format_name', 'class_type', 'dbxref', 'json'], session, output_creator)
                                
                if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                    untouched_obj_ids.remove(current_obj_by_id.id)
                if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                    untouched_obj_ids.remove(current_obj_by_key.id)
                    
            #Commit
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            session.commit()
            min_id = min_id+chunk_size
                
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        session.commit()
            
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        session.close()
        
    log.info('complete')
Example #2
0
def convert_bibentry(sessionmaker, link, cls, chunk_size):
    log = logging.getLogger('convert.performance.' + cls.__name__)
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        session = sessionmaker()
        
        #Cache current objs
        current_objs = session.query(cls).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab new objs from backend
        objs_json = get_json(link())
        
        min_id = 0
        count = len(objs_json)
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):
            old_objs = objs_json[min_id:min_id+chunk_size]
            for obj_json in old_objs:
                newly_created_obj = cls(obj_json['id'], obj_json['text'])
                current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_id, ['json'], session, output_creator)
                                
                if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                    untouched_obj_ids.remove(current_obj_by_id.id)
                    
            #Commit
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            session.commit()
            min_id = min_id+chunk_size
                
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        session.commit()
            
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        session.close()
        
    log.info('complete')
Example #3
0
def convert_by_bioentity(sessionmaker, link, cls, bioents):    
    log = logging.getLogger('convert.performance.' + cls.__name__)
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        session = sessionmaker()
         
        #Cache current objs
        current_objs = session.query(cls).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab new objs from backend
        i = 0
        for bioent in bioents:
            json_obj = get_json(link(bioent.format_name, bioent.class_type))
            newly_created_obj = cls(bioent.id, json.dumps(json_obj))
            current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
            create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_id, ['json'], session, output_creator)
                            
            if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                untouched_obj_ids.remove(current_obj_by_id.id)

            i = i+1
            if i%1000 == 0:
                output_creator.finished(str(i))
                session.commit()
                
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
            output_creator.finished()
            session.commit()
            
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        session.close()
        
    log.info('complete')