def convert_qualifier_evidence(old_session_maker, new_session_maker):
    from model_new_schema.bioentity import Qualifierevidence as NewQualifierevidence, Bioentity as NewBioentity
    from model_old_schema.feature import Feature as OldFeature
    
    log = logging.getLogger('convert.bioentity_in_depth.qualifier_evidence')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(NewQualifierevidence).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
                
        #Values to check
        values_to_check = ['reference_id', 'experiment_id', 'strain', 'source', 'date_created', 'created_by',
                           'bioentity_id', 'qualifier']
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab cached dictionaries
        id_to_bioentity = dict([(x.id, x) for x in new_session.query(NewBioentity).all()])
        
        #Grab old objects
        old_session = old_session_maker()
        old_objs = old_session.query(OldFeature).options(joinedload('annotation'))
        
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_qualifier_evidence(old_obj, id_to_bioentity)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                    if current_obj_by_id is not None and current_obj_by_id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
def convert_reftype(old_session_maker, new_session_maker):
    from model_new_schema.reference import Reference as NewReference, Reftype as NewReftype
    from model_old_schema.reference import RefReftype as OldRefReftype
    
    log = logging.getLogger('convert.reference_in_depth.reftype')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(NewReftype).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
        
        #Grab old objects
        old_session = old_session_maker()
        old_objs = old_session.query(OldRefReftype).options(joinedload('reftype')).all()
        
        #Values to check
        values_to_check =  ['source']
        
        untouched_obj_ids = set(id_to_current_obj.keys())
                
        #Grab cached dictionaries
        reference_ids = set([x.id for x in new_session.query(NewReference).all()])
            
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_reftype(old_obj, reference_ids)
            
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 3
0
def convert_locus(old_session_maker, new_session_maker):
    from model_new_schema.bioentity import Locus as NewLocus
    from model_old_schema.feature import Feature as OldFeature
    
    log = logging.getLogger('convert.bioentity.locus')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(NewLocus).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
                
        #Values to check
        values_to_check = ['display_name', 'link', 'source', 'status', 'date_created', 'created_by',
                       'attribute', 'name_description', 'headline', 'description',  'dbxref',
                       'genetic_position', 'locus_type']
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab old objects
        old_session = old_session_maker()
        old_objs = old_session.query(OldFeature).options(joinedload('annotation')).all()
        
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_locus(old_obj)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 4
0
def convert_paragraph(new_session_maker):
    from model_new_schema.bioentity import Bioentity, Paragraph
    
    log = logging.getLogger('convert.regulation.paragraph')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:   
        new_session = new_session_maker()
         
        #Values to check
        values_to_check = ['text', 'date_created', 'created_by'] 
        
        #Grab cached dictionaries
        key_to_bioentity = dict([(x.unique_key(), x) for x in new_session.query(Bioentity).all()])       
        
        #Grab all current objects
        current_objs = new_session.query(Paragraph).filter(Paragraph.class_type == 'REGULATION').all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
        
        untouched_obj_ids = set(id_to_current_obj.keys())

        old_objs = break_up_file('/Users/kpaskov/final/Reg_Summary_Paragraphs04282013.txt')
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_paragraph(old_obj, key_to_bioentity)
     
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    unique_key = newly_created_obj.unique_key()
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        
    log.info('complete')
Ejemplo n.º 5
0
def convert_strain(old_session_maker, new_session_maker):
    from model_new_schema.evelement import Strain as NewStrain
    from model_old_schema.cv import CVTerm as OldCVTerm
    
    log = logging.getLogger('convert.evelements.strain')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(NewStrain).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
                
        #Values to check
        values_to_check = ['display_name', 'link', 'description', 'date_created', 'created_by']
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab old objects
        old_session = old_session_maker()
        old_objs = old_session.query(OldCVTerm).filter(OldCVTerm.cv_no==10).all()
        
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_strain(old_obj)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                                                
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 6
0
def convert_book(old_session_maker, new_session_maker):
    from model_new_schema.reference import Book as NewBook
    from model_old_schema.reference import Book as OldBook
    
    log = logging.getLogger('convert.reference.book')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(NewBook).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
                
        #Values to check
        values_to_check = ['isbn', 'total_pages', 'publisher', 'publisher_location', 'created_by', 'date_created']
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab old objects
        old_session = old_session_maker()
        old_objs = old_session.query(OldBook).all()
        
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_book(old_obj)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
def convert_bioentitytabs(new_session_maker):
    from model_new_schema.bioentity import Locus, Bioentitytabs
    
    log = logging.getLogger('convert.bioentity_in_depth.bioentitytabs')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(Bioentitytabs).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
                
        #Values to check
        values_to_check = ['summary', 'history', 'literature', 'go', 'phenotype', 'interactions', 'expression',
                           'regulation', 'protein', 'wiki']
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab old objects
        new_session = new_session_maker()
        old_objs = new_session.query(Locus).all()
        
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_bioentitytabs(old_obj)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        
    log.info('complete')
def convert_author_reference(old_session_maker, new_session_maker, chunk_size):
    from model_new_schema.reference import Author as NewAuthor, Reference as NewReference, AuthorReference as NewAuthorReference
    from model_old_schema.reference import AuthorReference as OldAuthorReference, Author as OldAuthor
    
    log = logging.getLogger('convert.reference_in_depth.author_reference')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        new_session = new_session_maker()
        old_session = old_session_maker()
        
        #Values to check
        values_to_check = ['author_type']
        
        #Grab cached dictionaries
        reference_ids = set([x.id for x in new_session.query(NewReference).all()])
        
        #Simplify author conversion
        old_id_to_key = dict([(x.id, create_format_name(x.name)) for x in old_session.query(OldAuthor).all()])
        new_key_to_id = dict([(x.unique_key(), x.id) for x in new_session.query(NewAuthor).all()])
        old_id_to_new_id_author = dict([(x, new_key_to_id[y]) for x, y in old_id_to_key.iteritems()])
        
        used_unique_keys = set()
        
        count = old_session.query(func.max(OldAuthorReference.id)).first()[0]
        num_chunks = ceil(1.0*count/chunk_size)
        min_id = 0
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(NewAuthorReference).filter(NewAuthorReference.id >= min_id).filter(NewAuthorReference.id < min_id+chunk_size).all()
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
        
            untouched_obj_ids = set(id_to_current_obj.keys())
        
            #Grab old objects
            old_objs = old_session.query(OldAuthorReference).filter(
                                            OldAuthorReference.id >= min_id).filter(
                                            OldAuthorReference.id <  min_id+chunk_size).all()
            
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_author_reference(old_obj, old_id_to_new_id_author, reference_ids)
                
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        unique_key = newly_created_obj.unique_key()
                        if unique_key not in used_unique_keys:
                            current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                            current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                            create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        
                            if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                                untouched_obj_ids.remove(current_obj_by_id.id)
                            if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                                untouched_obj_ids.remove(current_obj_by_key.id)
                            used_unique_keys.add(unique_key)
                                
            #Delete untouched objs
            for untouched_obj_id  in untouched_obj_ids:
                new_session.delete(id_to_current_obj[untouched_obj_id])
                output_creator.removed()
                        
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id + chunk_size
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 9
0
def convert_evidence(new_session_maker, chunk_size):
    from model_new_schema.regulation import Regulationevidence
    from model_new_schema.evelement import Experiment
    from model_new_schema.bioentity import Bioentity
    from model_new_schema.reference import Reference
    
    log = logging.getLogger('convert.regulation.evidence')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:   
        new_session = new_session_maker()
         
        #Values to check
        values_to_check = ['experiment_id', 'reference_id', 'strain_id', 'source', 'conditions', 
                       'bioentity1_id', 'bioentity2_id', 'date_created', 'created_by']
        
        #Grab cached dictionaries
        key_to_experiment = dict([(x.unique_key(), x) for x in new_session.query(Experiment).all()])
        key_to_bioent = dict([(x.unique_key(), x) for x in new_session.query(Bioentity).all()])
        pubmed_to_reference_id = dict([(x.pubmed_id, x.id) for x in new_session.query(Reference).all()])
        
        #Grab old objects
        data = break_up_file('/Users/kpaskov/final/yeastmine_regulation.tsv')
                
        count = len(data)
        num_chunks = ceil(1.0*count/chunk_size)
        min_id = 0
        j = 0
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(Regulationevidence).filter(Regulationevidence.id >= create_evidence_id(min_id)).filter(Regulationevidence.id < create_evidence_id(min_id+chunk_size)).all()
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
            
            untouched_obj_ids = set(id_to_current_obj.keys())

            old_objs = data[min_id:min_id+chunk_size]
        
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_evidence(old_obj, j, key_to_experiment, key_to_bioent, pubmed_to_reference_id)
         
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        unique_key = newly_created_obj.unique_key()
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                j = j + 1
                
            #Delete untouched objs
            for untouched_obj_id  in untouched_obj_ids:
                new_session.delete(id_to_current_obj[untouched_obj_id])
                output_creator.removed()
                        
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id+chunk_size
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        
    log.info('complete')
Ejemplo n.º 10
0
def convert_experiment(old_session_maker, new_session_maker):
    from model_new_schema.evelement import Experiment as NewExperiment
    from model_old_schema.cv import CVTerm as OldCVTerm
    
    log = logging.getLogger('convert.evelements.experiment')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(NewExperiment).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
                
        #Values to check
        values_to_check = ['display_name', 'link', 'description', 'date_created', 'created_by', 'eco_id']
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab old objects
        old_session = old_session_maker()
        old_objs = old_session.query(OldCVTerm).filter(OldCVTerm.cv_no==7).all()
        
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_experiment(old_obj)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                       
        #Get experiments from regulation files
        experiment_names = set()
        
        rows = break_up_file('/Users/kpaskov/final/yeastmine_regulation.tsv')
        experiment_names.update([(row[4], row[5]) for row in rows])
                
        i=0
        for experiment_name, eco_id in experiment_names:
            newly_created_objs = create_experiment_from_reg_row(experiment_name, eco_id, i)
            for newly_created_obj in newly_created_objs:
                current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        
                if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                    untouched_obj_ids.remove(current_obj_by_id.id)
                if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                    untouched_obj_ids.remove(current_obj_by_key.id)                        
                i = i+1
          
        experiment_names = set()      
        #Add experiments from binding files
        rows = break_up_file('/Users/kpaskov/final/yetfasco_data.txt', delimeter=';')
        for row in rows:
            if len(row) < 10:
                print row
        experiment_names.update([row[9][1:-1] for row in rows])
        
        i=0
        for experiment_name in experiment_names:
            newly_created_objs = create_experiment_from_binding_row(experiment_name, i)
            for newly_created_obj in newly_created_objs:
                current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        
                if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                    untouched_obj_ids.remove(current_obj_by_id.id)
                if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                    untouched_obj_ids.remove(current_obj_by_key.id)                        
                i = i+1
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            print 'Removed at end'
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 11
0
def convert_protein(old_session_maker, new_session_maker):
    from model_new_schema.bioentity import Bioentity as NewBioentity
    from model_new_schema.protein import Protein as NewProtein
    from model_old_schema.sequence import ProteinInfo as OldProteinInfo
    
    log = logging.getLogger('convert.bioentity.protein')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(NewProtein).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
                
        #Values to check
        values_to_check = ['display_name', 'link', 'source', 'status', 'date_created', 'created_by', 'link',
                       'locus_id', 'length', 'n_term_seq', 'c_term_seq']
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab old objects
        old_session = old_session_maker()
        old_objs = old_session.query(OldProteinInfo).all()
        
        #Grab cached dictionaries
        id_to_bioentity = dict([(x.id, x) for x in new_session.query(NewBioentity).all()])       
        
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_protein(old_obj, id_to_bioentity)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 12
0
def convert_interaction(new_session_maker, evidence_class, class_type,  label, chunk_size, directed):
    from model_new_schema.auxiliary import Interaction
    from model_new_schema.bioentity import Bioentity
    
    log = logging.getLogger(label)
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:   
        new_session = new_session_maker()
         
        #Values to check
        values_to_check = ['display_name', 'bioentity1_id', 'bioentity2_id', 'evidence_count']   
        
        #Grab all current objects
        current_objs = new_session.query(Interaction).filter(Interaction.class_type == class_type).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
        
        #Grab cached dictionaries
        id_to_bioent = dict([(x.id, x) for x in new_session.query(Bioentity).all()])
            
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        used_unique_keys = set()   
        
        #Precomp evidence count
        format_name_to_evidence_count = {}
        min_id = new_session.query(func.min(evidence_class.id)).first()[0]
        count = new_session.query(func.max(evidence_class.id)).first()[0] - min_id
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):  
            more_old_objs = new_session.query(evidence_class).filter(evidence_class.id >= min_id).filter(evidence_class.id < min_id+chunk_size).all()
            interaction_precomp(format_name_to_evidence_count, more_old_objs, id_to_bioent, directed)
            min_id = min_id + chunk_size

        #Create interactions
        min_id = new_session.query(func.min(evidence_class.id)).first()[0]
        count = new_session.query(func.max(evidence_class.id)).first()[0] - min_id
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):  
            old_objs = new_session.query(evidence_class).filter(evidence_class.id >= min_id).filter(evidence_class.id < min_id+chunk_size).all()    
            for old_obj in old_objs:
                #Convert old objects into new ones
                if directed:
                    format_name = create_directed_key(old_obj)
                else:
                    format_name = create_undirected_interaction_format_name(old_obj, id_to_bioent)
                evidence_count = format_name_to_evidence_count[format_name]
                newly_created_objs = create_interaction(old_obj, evidence_count, id_to_bioent, directed)
         
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        unique_key = newly_created_obj.unique_key()
                        if unique_key not in used_unique_keys:
                            current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                            current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                            create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                            used_unique_keys.add(unique_key)
                            
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                            
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id+chunk_size
            
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        
    log.info('complete')
def convert_url(new_session_maker, chunk_size):
    from model_new_schema.reference import Reference, Referenceurl as NewReferenceurl
    
    log = logging.getLogger('convert.reference_in_depth.reference_url')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
                
        #Values to check
        values_to_check = ['display_name', 'category', 'source', 'date_created', 'created_by', 'reference_id', 'url_type']
        
        count = new_session.query(func.max(Reference.id)).first()[0]
        num_chunks = ceil(1.0*count/chunk_size)
        min_id = 0
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(NewReferenceurl).filter(NewReferenceurl.reference_id >= min_id).filter(NewReferenceurl.reference_id <=  min_id+chunk_size).all()
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
            
            untouched_obj_ids = set(id_to_current_obj.keys())
        
            #Grab old objects
            old_objs = new_session.query(Reference).filter(
                                            Reference.id >= min_id).filter(
                                            Reference.id <=  min_id+chunk_size).all()
            
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_url(old_obj)
                
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                            
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
                        
            min_id = min_id + chunk_size + 1
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        
    log.info('complete')
Ejemplo n.º 14
0
def convert_evidence(old_session_maker, new_session_maker, chunk_size):
    from model_new_schema.go import Goevidence as NewGoevidence
    from model_new_schema.reference import Reference as NewReference
    from model_new_schema.bioentity import Bioentity as NewBioentity
    from model_new_schema.go import Go as NewGo
    from model_old_schema.go import GoRef as OldGoRef
    
    log = logging.getLogger('convert.go.evidence')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        new_session = new_session_maker()
        old_session = old_session_maker()      
                  
        #Values to check
        values_to_check = ['experiment_id', 'reference_id', 'strain_id', 'source',
                       'go_evidence', 'annotation_type', 'date_last_reviewed', 'qualifier',
                       'bioentity_id', 'bioconcept_id', 'date_created', 'created_by']
        
        #Grab cached dictionaries
        bioent_ids = set([x.id for x in new_session.query(NewBioentity).all()])
        reference_ids = set([x.id for x in new_session.query(NewReference).all()])
        key_to_go = dict([(x.unique_key(), x) for x in new_session.query(NewGo).all()])
        
        already_used_keys = set()
        
        min_id = old_session.query(func.min(OldGoRef.id)).first()[0]
        count = old_session.query(func.max(OldGoRef.id)).first()[0] - min_id
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(NewGoevidence).filter(NewGoevidence.id >= create_evidence_id(min_id)).filter(NewGoevidence.id < create_evidence_id(min_id+chunk_size)).all()
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
            
            untouched_obj_ids = set(id_to_current_obj.keys())
            
            #Grab old objects
            old_objs = old_session.query(OldGoRef).filter(
                                OldGoRef.id >= min_id).filter(
                                OldGoRef.id < min_id+chunk_size).options(
                                    joinedload('go_annotation')).all()
        
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_evidence(old_obj, key_to_go, reference_ids, bioent_ids)
                    
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        key = newly_created_obj.unique_key()
                        if key not in already_used_keys:
                            current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                            current_obj_by_key = None if key not in key_to_current_obj else key_to_current_obj[key]
                            create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                            already_used_keys.add(key)
                            
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                            
            #Delete untouched objs
            for untouched_obj_id  in untouched_obj_ids:
                new_session.delete(id_to_current_obj[untouched_obj_id])
                output_creator.removed()
    
            #Commit
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id+chunk_size
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')     
def convert_bibentry(new_session_maker, chunk_size):
    from model_new_schema.reference import Reference as NewReference, Bibentry as NewBibentry, \
    Journal as NewJournal, Book as NewBook, Abstract as NewAbstract, \
    Reftype as NewReftype, Author as NewAuthor, AuthorReference as NewAuthorReference
    
    log = logging.getLogger('convert.reference_in_depth.bibentry')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        new_session = new_session_maker()
        
        #Values to check
        values_to_check = ['text']
        
        #Grab cached dictionaries
        id_to_journal = dict([(x.id, x) for x in new_session.query(NewJournal).all()])
        id_to_book = dict([(x.id, x) for x in new_session.query(NewBook).all()])
        id_to_abstract = dict([(x.id, x.text) for x in new_session.query(NewAbstract).all()])
        
        id_to_authors = {}
        id_to_author = dict([(x.id, x) for x in new_session.query(NewAuthor).all()])
        for ar in new_session.query(NewAuthorReference).all():
            reference_id = ar.reference_id
            author_name = id_to_author[ar.author_id].display_name
            
            if reference_id in id_to_authors:
                id_to_authors[reference_id].add(author_name)
            else:
                id_to_authors[reference_id] = set([author_name])
        
        id_to_reftypes = {}
        reftypes = new_session.query(NewReftype).all()
        for reftype in reftypes:
            reference_id = reftype.reference_id
            reftype_name = reftype.name
            
            if reference_id in id_to_reftypes:
                id_to_reftypes[reference_id].add(reftype_name)
            else:
                id_to_reftypes[reference_id] = set([author_name])
        
        count = new_session.query(func.max(NewReference.id)).first()[0]
        num_chunks = ceil(1.0*count/chunk_size)
        min_id = 0
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(NewBibentry).filter(NewBibentry.id >= min_id).filter(NewBibentry.id <=  min_id+chunk_size).all()
            
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
        
            untouched_obj_ids = set(id_to_current_obj.keys())
        
            #Grab old objects
            old_objs = new_session.query(NewReference).filter(
                                            NewReference.id >= min_id).filter(
                                            NewReference.id <=  min_id+chunk_size).options(joinedload('author_references')).all()
            
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_bibentry(old_obj, id_to_journal, id_to_book, id_to_abstract, id_to_reftypes, id_to_authors)
                
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                            
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
                        
            min_id = min_id + chunk_size + 1
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        
    log.info('complete')
Ejemplo n.º 16
0
def convert_evidence(old_session_maker, new_session_maker, chunk_size):
    from model_new_schema.phenotype import Phenotypeevidence as NewPhenotypeevidence
    from model_new_schema.reference import Reference as NewReference
    from model_new_schema.evelement import Experiment as NewExperiment, Strain as NewStrain
    from model_new_schema.bioentity import Bioentity as NewBioentity
    from model_new_schema.misc import Allele as NewAllele
    from model_new_schema.phenotype import Phenotype as NewPhenotype
    from model_old_schema.reference import Reflink as OldReflink
    from model_old_schema.phenotype import PhenotypeFeature as OldPhenotypeFeature
    
    log = logging.getLogger('convert.phenotype.evidence')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        new_session = new_session_maker()
        old_session = old_session_maker()      
                  
        #Values to check
        values_to_check = ['experiment_id', 'reference_id', 'strain_id', 'source',
                       'bioentity_id', 'bioconcept_id', 'date_created', 'created_by',
                       'reporter', 'reporter_desc', 'strain_details', 
                       'conditions', 'details', 'experiment_details', 'allele_info', 'allele_id']
        
        #Grab cached dictionaries
        key_to_experiment = dict([(x.unique_key(), x) for x in new_session.query(NewExperiment).all()])
        key_to_phenotype = dict([(x.unique_key(), x) for x in new_session.query(NewPhenotype).all()])
        key_to_strain = dict([(x.unique_key(), x) for x in new_session.query(NewStrain).all()])
        key_to_allele = dict([(x.unique_key(), x) for x in new_session.query(NewAllele).all()])
        bioent_ids = set([x.id for x in new_session.query(NewBioentity).all()])
        reference_ids = set([x.id for x in new_session.query(NewReference).all()])
        
        old_reflinks = old_session.query(OldReflink).all()
        key_to_reflink = dict([((x.col_name, x.primary_key), x) for x in old_reflinks])
        
        min_id = old_session.query(func.min(OldPhenotypeFeature.id)).first()[0]
        count = old_session.query(func.max(OldPhenotypeFeature.id)).first()[0] - min_id
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(NewPhenotypeevidence).filter(NewPhenotypeevidence.id >= create_evidence_id(min_id)).filter(NewPhenotypeevidence.id < create_evidence_id(min_id+chunk_size)).all()
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
            
            untouched_obj_ids = set(id_to_current_obj.keys())
            
            #Grab old objects
            old_objs = old_session.query(OldPhenotypeFeature).filter(
                                OldPhenotypeFeature.id >= min_id).filter(
                                OldPhenotypeFeature.id < min_id+chunk_size).options(
                                        joinedload('experiment')).all()
        
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_evidence(old_obj, key_to_reflink, key_to_phenotype, reference_ids, 
                                                     bioent_ids, key_to_strain, key_to_experiment, key_to_allele)
                    
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                            
            #Delete untouched objs
            for untouched_obj_id  in untouched_obj_ids:
                new_session.delete(id_to_current_obj[untouched_obj_id])
                output_creator.removed()
    
            #Commit
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id+chunk_size
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 17
0
def convert_litevidence(old_session_maker, new_session_maker, chunk_size):
    from model_new_schema.literature import Literatureevidence as NewLiteratureevidence
    from model_new_schema.reference import Reference as NewReference
    from model_new_schema.bioentity import Bioentity as NewBioentity
    from model_old_schema.reference import LitguideFeat as OldLitguideFeat
    
    log = logging.getLogger('convert.literature.evidence')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        new_session = new_session_maker()
        old_session = old_session_maker()      
                  
        #Values to check
        values_to_check = ['experiment_id', 'reference_id', 'class_type', 'strain_id',
                       'source', 'topic', 'bioentity_id', 'date_created', 'created_by']
        
        
        #Grab cached dictionaries
        bioent_ids = set([x.id for x in new_session.query(NewBioentity).all()])
        reference_ids = set([x.id for x in new_session.query(NewReference).all()])
        
        min_id = old_session.query(func.min(OldLitguideFeat.id)).first()[0]
        count = old_session.query(func.max(OldLitguideFeat.id)).first()[0] - min_id
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(NewLiteratureevidence).filter(NewLiteratureevidence.id >= create_litevidence_id(min_id)).filter(NewLiteratureevidence.id < create_litevidence_id(min_id+chunk_size)).all()
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
            
            untouched_obj_ids = set(id_to_current_obj.keys())
            
            #Grab old objects                  
            old_objs = old_session.query(OldLitguideFeat).filter(
                                                OldLitguideFeat.id >= min_id).filter(
                                                OldLitguideFeat.id < min_id+chunk_size).filter(
                                                or_(OldLitguideFeat.topic=='Additional Literature',
                                                    OldLitguideFeat.topic=='Primary Literature',
                                                    OldLitguideFeat.topic=='Omics',
                                                    OldLitguideFeat.topic=='Reviews')).options(
                                                joinedload('litguide')).all()
        
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_litevidence(old_obj, reference_ids, bioent_ids)
                    
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                            
            #Delete untouched objs
            for untouched_obj_id  in untouched_obj_ids:
                new_session.delete(id_to_current_obj[untouched_obj_id])
                output_creator.removed()
    
            #Commit
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id+chunk_size
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 18
0
def convert_domain(new_session_maker, chunk_size):
    from model_new_schema.protein import Domain as Domain
    
    log = logging.getLogger('convert.protein.domain')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(Domain).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
                
        #Values to check
        values_to_check = ['display_name', 'description', 'interpro_id', 'interpro_description', 'link']
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab old objects
        data = break_up_file('/Users/kpaskov/final/yeastmine_protein_domains.tsv')
        
        used_unique_keys = set()   
        
        min_id = 0
        count = len(data)
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):
            old_objs = data[min_id:min_id+chunk_size]
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_domain(old_obj)
                    
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        unique_key = newly_created_obj.unique_key()
                        if unique_key not in used_unique_keys:
                            current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                            current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                            create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                            used_unique_keys.add(unique_key)
                            
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                            
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id+chunk_size
            
        #Grab JASPAR domains from file
        old_objs = break_up_file('/Users/kpaskov/final/TF_family_class_accession04302013.txt')
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_domain_from_tf_file(old_obj)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    unique_key = newly_created_obj.unique_key()
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    used_unique_keys.add(unique_key)
                        
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                        
        output_creator.finished("1/1")
        new_session.commit()
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        
    log.info('complete')
Ejemplo n.º 19
0
def convert_domain_evidence(new_session_maker, chunk_size):
    from model_new_schema.protein import Domain, Domainevidence
    from model_new_schema.bioentity import Bioentity
    from model_new_schema.reference import Reference
    
    log = logging.getLogger('convert.protein.domain_evidence')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(Domainevidence).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
                
        #Values to check
        values_to_check = ['reference_id', 'strain_id', 'source', 'date_created', 'created_by',
                           'start', 'end', 'evalue', 'status', 'date_of_run', 'protein_id', 'domain_id']
        
        #Grab cached dictionaries
        key_to_bioentity = dict([(x.unique_key(), x) for x in new_session.query(Bioentity).all()])       
        key_to_domain = dict([(x.unique_key(), x) for x in new_session.query(Domain).all()]) 
        pubmed_id_to_reference_id = dict([(x.pubmed_id, x.id) for x in new_session.query(Reference).all()]) 
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab old objects
        data = break_up_file('/Users/kpaskov/final/yeastmine_protein_domains.tsv')
        
        used_unique_keys = set()   
        
        j=0
        min_id = 0
        count = len(data)
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):
            old_objs = data[min_id:min_id+chunk_size]
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_domain_evidence(old_obj, j, key_to_bioentity, key_to_domain)
                    
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        unique_key = (newly_created_obj.protein_id, newly_created_obj.domain_id, newly_created_obj.start,
                                      newly_created_obj.end, newly_created_obj.evalue)
                        if unique_key not in used_unique_keys:
                            current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                            current_obj_by_key = None if newly_created_obj.id not in key_to_current_obj else key_to_current_obj[newly_created_obj.id]
                            create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                            used_unique_keys.add(unique_key)
                            
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                j = j+1
                            
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id+chunk_size
            
        #Grab JASPAR evidence from file
        old_objs = break_up_file('/Users/kpaskov/final/TF_family_class_accession04302013.txt')
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_domain_evidence_from_tf_file(old_obj, j, key_to_bioentity, key_to_domain, pubmed_id_to_reference_id)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    unique_key = newly_created_obj.unique_key()
                    if unique_key not in used_unique_keys:
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        used_unique_keys.add(unique_key)
                        
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
            j = j+1
                        
        output_creator.finished("1/1")
        new_session.commit()
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        
    log.info('complete')
Ejemplo n.º 20
0
def convert_bioentity_reference(new_session_maker, evidence_class, class_type, label, chunk_size, get_bioent_ids_f, 
                                filter_f=None):
    from model_new_schema.auxiliary import BioentityReference
    from model_new_schema.bioentity import Paragraph
    
    log = logging.getLogger(label)
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:   
        new_session = new_session_maker()
         
        #Values to check
        values_to_check = []     
        
        #Grab all current objects
        current_objs = new_session.query(BioentityReference).filter(BioentityReference.class_type == class_type).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
            
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        used_unique_keys = set()   
        
        min_id = new_session.query(func.min(evidence_class.id)).first()[0]
        count = new_session.query(func.max(evidence_class.id)).first()[0] - min_id
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):
            old_objs = new_session.query(evidence_class).filter(evidence_class.id >= min_id, evidence_class.id <= min_id+chunk_size).all()
        
            for old_obj in old_objs:
                if filter_f is None or filter_f(old_obj):
                    #Convert old objects into new ones
                    newly_created_objs = create_bioentity_reference(old_obj, get_bioent_ids_f, class_type)
             
                    if newly_created_objs is not None:
                        #Edit or add new objects
                        for newly_created_obj in newly_created_objs:
                            unique_key = newly_created_obj.unique_key()
                            if unique_key not in used_unique_keys:
                                current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                                current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                                create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                                used_unique_keys.add(unique_key)
                                
                            if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                                untouched_obj_ids.remove(current_obj_by_id.id)
                            if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                                untouched_obj_ids.remove(current_obj_by_key.id)
                            
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id+chunk_size
            
        #Add paragraph-related bioent_references.
        old_objs = new_session.query(Paragraph).filter(Paragraph.class_type == class_type).options(joinedload('paragraph_references')).all()                               
        for old_obj in old_objs:
            if filter_f is None or filter_f(old_obj):
                #Convert old objects into new ones
                newly_created_objs = create_bioentity_reference_from_paragraph(old_obj, class_type)
         
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        unique_key = newly_created_obj.unique_key()
                        if unique_key not in used_unique_keys:
                            current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                            current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                            create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                            used_unique_keys.add(unique_key)
                            
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
            
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        
    log.info('complete')
    
    
def convert_abstract(old_session_maker, new_session_maker, chunk_size):
    from model_new_schema.reference import Reference as NewReference, Abstract as NewAbstract
    from model_old_schema.reference import Reference as OldReference
    
    log = logging.getLogger('convert.reference_in_depth.abstract')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        new_session = new_session_maker()
        old_session = old_session_maker()
        
        #Values to check
        values_to_check = ['text']
                
        #Grab cached dictionaries
        reference_ids = set([x.id for x in new_session.query(NewReference).all()])
        
        count = old_session.query(func.max(OldReference.id)).first()[0]
        num_chunks = ceil(1.0*count/chunk_size)
        min_id = 0
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(NewAbstract).filter(NewAbstract.id >= min_id).filter(NewAbstract.id <=  min_id+chunk_size).all()
            
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
        
            untouched_obj_ids = set(id_to_current_obj.keys())
        
            #Grab old objects
            old_objs = old_session.query(OldReference).filter(
                                            OldReference.id >= min_id).filter(
                                            OldReference.id <=  min_id+chunk_size).options(
                                            joinedload('abst')).all()
            
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_abstract(old_obj, reference_ids)
                
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                            
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
                        
            min_id = min_id + chunk_size + 1
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 22
0
def convert_reference(old_session_maker, new_session_maker, chunk_size):
    from model_new_schema.reference import Reference as NewReference, Book as NewBook, Journal as NewJournal
    from model_old_schema.reference import Reference as OldReference
    
    log = logging.getLogger('convert.reference.reference')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
                
        #Values to check
        values_to_check = ['display_name', 'format_name', 'link', 'source', 
                       'status', 'pubmed_id', 'pubmed_central_id', 'pdf_status', 'year', 'date_published', 
                       'date_revised', 'issue', 'page', 'volume', 'title',
                       'journal_id', 'book_id', 'doi',
                       'created_by', 'date_created']
                
        #Grab cached dictionaries
        key_to_journal = dict([(x.unique_key(), x) for x in new_session.query(NewJournal).all()])
        key_to_book = dict([(x.unique_key(), x) for x in new_session.query(NewBook).all()])
        
        #Grab old objects
        old_session = old_session_maker()
        
        used_unique_keys = set()
        
        count = old_session.query(func.max(OldReference.id)).first()[0]
        num_chunks = ceil(1.0*count/chunk_size)
        min_id = 0
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(NewReference).filter(NewReference.id >= min_id).filter(NewReference.id <=  min_id+chunk_size).all()
            
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
            
            untouched_obj_ids = set(id_to_current_obj.keys())
        
            #Grab old objects
            old_objs = old_session.query(OldReference).filter(
                                            OldReference.id >= min_id).filter(
                                            OldReference.id <=  min_id+chunk_size).options(
                                            joinedload('book'), 
                                            joinedload('journal')).all()
                                            
            old_pubmed_ids = [x.pubmed_id for x in old_objs if x.pubmed_id is not None]
            pubmed_id_to_pubmed_central_id = get_pubmed_central_ids(old_pubmed_ids)
            
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_reference(old_obj, key_to_journal, key_to_book, pubmed_id_to_pubmed_central_id)
                
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        unique_key = newly_created_obj.unique_key()
                        if unique_key not in used_unique_keys:
                            current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                            current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                            
                            create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                            if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                                untouched_obj_ids.remove(current_obj_by_id.id)
                            if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                                untouched_obj_ids.remove(current_obj_by_key.id)
                            used_unique_keys.add(unique_key)
                            
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
                        
            min_id = min_id + chunk_size + 1
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
def convert_url(old_session_maker, new_session_maker, chunk_size):
    from model_new_schema.bioentity import Bioentity as NewBioentity, Bioentityurl as NewBioentityurl
    from model_old_schema.general import WebDisplay as OldWebDisplay, FeatUrl as OldFeatUrl, DbxrefFeat as OldDbxrefFeat
    
    log = logging.getLogger('convert.bioentity_in_depth.bioentity_url')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        new_session = new_session_maker()
        old_session = old_session_maker()
        
        #Values to check
        values_to_check = ['display_name', 'source', 'created_by', 'date_created']
        
        #Grab cached dictionaries
        id_to_bioentity = dict([(x.id, x) for x in new_session.query(NewBioentity).all()])
        
        #Urls of interest
        old_web_displays = old_session.query(OldWebDisplay).filter(OldWebDisplay.label_location == 'Interaction Resources').all()
        url_to_display = dict([(x.url_id, x) for x in old_web_displays])
                
        count = max(id_to_bioentity.keys())
        num_chunks = ceil(1.0*count/chunk_size)
        min_id = 0
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(NewBioentityurl).filter(NewBioentityurl.bioentity_id >= min_id).filter(NewBioentityurl.bioentity_id < min_id+chunk_size).all()
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
        
            untouched_obj_ids = set(id_to_current_obj.keys())
        
            #Grab old objects
            old_objs = old_session.query(OldFeatUrl).filter(OldFeatUrl.feature_id >= min_id).filter(OldFeatUrl.feature_id < min_id+chunk_size).options(joinedload('url')).all()
            
            for old_obj in old_objs:
                #Convert old objects into new ones
                if old_obj.url_id in url_to_display:
                    newly_created_objs = create_url(old_obj, url_to_display[old_obj.url_id], id_to_bioentity)
                    
                    if newly_created_objs is not None:
                        #Edit or add new objects
                        for newly_created_obj in newly_created_objs:
                            current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                            current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                            create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        
                            if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                                untouched_obj_ids.remove(current_obj_by_id.id)
                            if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                                untouched_obj_ids.remove(current_obj_by_key.id)
                                
            #Grab old objects (dbxref)
            old_objs = old_session.query(OldDbxrefFeat).filter(
                                            OldDbxrefFeat.feature_id >= min_id).filter(
                                            OldDbxrefFeat.feature_id < min_id+chunk_size).options(
                                                            joinedload('dbxref'), joinedload('dbxref.dbxref_urls')).all()
            
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_url_from_dbxref(old_obj, url_to_display, id_to_bioentity)
                
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                                
            #Delete untouched objs
            for untouched_obj_id  in untouched_obj_ids:
                new_session.delete(id_to_current_obj[untouched_obj_id])
                output_creator.removed()
                        
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id + chunk_size
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 24
0
def convert_experiment_alias(old_session_maker, new_session_maker):
    from model_new_schema.evelement import Experiment as NewExperiment, Experimentalias as NewExperimentalias
    from model_old_schema.cv import CVTerm as OldCVTerm
    
    log = logging.getLogger('convert.evelements.experiment_alias')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        #Grab all current objects
        new_session = new_session_maker()
        current_objs = new_session.query(NewExperimentalias).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
                
        #Values to check
        values_to_check = ['source', 'category', 'date_created', 'created_by']
        
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        #Grab cached dictionaries
        key_to_experiment = dict([(x.unique_key(), x) for x in new_session.query(NewExperiment).all()])
        
        #Grab old objects
        old_session = old_session_maker()
        old_objs = old_session.query(OldCVTerm).filter(OldCVTerm.cv_no==7).options(
                                                    joinedload('cv_dbxrefs'), 
                                                    joinedload('cv_dbxrefs.dbxref')).all()
        
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_experiment_alias(old_obj, key_to_experiment)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                    current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                    create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                    
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()
        
        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 25
0
def convert_evidence_chemical(old_session_maker, new_session_maker, chunk_size):
    from model_new_schema.phenotype import Phenotypeevidence as NewPhenotypeevidence
    from model_new_schema.chemical import Chemical as NewChemical
    from model_new_schema.evidence import EvidenceChemical as NewEvidenceChemical
    from model_old_schema.phenotype import PhenotypeFeature as OldPhenotypeFeature
    
    log = logging.getLogger('convert.phenotype.evidence_chemical')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        new_session = new_session_maker()
        old_session = old_session_maker()      
                  
        #Values to check
        values_to_check = ['chemical_amt']
        
        #Grab cached dictionaries
        key_to_chemical = dict([(x.unique_key(), x) for x in new_session.query(NewChemical).all()])        

        min_id = old_session.query(func.min(OldPhenotypeFeature.id)).first()[0]
        count = old_session.query(func.max(OldPhenotypeFeature.id)).first()[0] - min_id
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks):
            #Grab all current objects
            current_objs = new_session.query(NewEvidenceChemical).filter(NewEvidenceChemical.evidence_id >= create_evidence_id(min_id)).filter(NewEvidenceChemical.evidence_id < create_evidence_id(min_id+chunk_size)).all()
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
            
            id_to_evidence = dict([(x.id, x) for x in new_session.query(NewPhenotypeevidence).filter(NewPhenotypeevidence.id >= create_evidence_id(min_id)).filter(NewPhenotypeevidence.id < create_evidence_id(min_id+chunk_size)).all()])  
            
            untouched_obj_ids = set(id_to_current_obj.keys())
            
            #Grab old objects
            old_objs = old_session.query(OldPhenotypeFeature).filter(
                                OldPhenotypeFeature.id >= min_id).filter(
                                OldPhenotypeFeature.id < min_id+chunk_size).options(
                                        joinedload('experiment')).all()
        
            for old_obj in old_objs:
                #Convert old objects into new ones
                newly_created_objs = create_evidence_chemical(old_obj, key_to_chemical, id_to_evidence)
                    
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                            
            #Delete untouched objs
            for untouched_obj_id  in untouched_obj_ids:
                new_session.delete(id_to_current_obj[untouched_obj_id])
                output_creator.removed()
    
            #Commit
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id+chunk_size
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
Ejemplo n.º 26
0
def convert_interaction_family(new_session_maker, chunk_size):
    from model_new_schema.auxiliary import Interaction, InteractionFamily
    from model_new_schema.bioentity import Bioentity
    
    log = logging.getLogger('convert.interaction.interaction_family')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:   
        new_session = new_session_maker()
         
        #Values to check
        values_to_check = ['bioentity1_id', 'bioentity2_id', 'genetic_ev_count', 'physical_ev_count', 'evidence_count']   
        
        #Grab cached dictionaries
        id_to_bioent = dict([(x.id, x) for x in new_session.query(Bioentity).all()]) 
        
        #Grab old objs
        interactions = new_session.query(Interaction).filter(or_(Interaction.class_type == 'PHYSINTERACTION', Interaction.class_type == 'GENINTERACTION')).all()
        
        bioent_id_to_evidence_cutoff, bioent_id_to_neighbor_ids, edge_to_counts = interaction_family_precomp(interactions, 100, id_to_bioent)
        
        min_id = 0
        count = new_session.query(func.max(Bioentity.id)).first()[0]
        num_chunks = ceil(1.0*count/chunk_size)
        for i in range(0, num_chunks): 
            #Grab all current objects
            current_objs = new_session.query(InteractionFamily).filter(InteractionFamily.bioentity_id >= min_id).filter(InteractionFamily.bioentity_id < min_id + chunk_size).all()
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])
            
            untouched_obj_ids = set(id_to_current_obj.keys())
             
            old_objs = new_session.query(Bioentity).filter(Bioentity.id >= min_id).filter(Bioentity.id < min_id+chunk_size).all()  
            for old_obj in old_objs:
                #Convert old objects into new ones
                evidence_cutoff = bioent_id_to_evidence_cutoff[old_obj.id]
                newly_created_objs = create_interaction_family(old_obj, evidence_cutoff, 
                                    bioent_id_to_neighbor_ids, edge_to_counts, id_to_bioent)
         
                if newly_created_objs is not None:
                    #Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        unique_key = newly_created_obj.unique_key()
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
            
                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)
                            
            #Delete untouched objs
            for untouched_obj_id  in untouched_obj_ids:
                new_session.delete(id_to_current_obj[untouched_obj_id])
                output_creator.removed()
                
            output_creator.finished(str(i+1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id+chunk_size
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        
    log.info('complete')
Ejemplo n.º 27
0
def convert_allele(old_session_maker, new_session_maker):
    from model_new_schema.phenotype import Allele as NewAllele
    from model_old_schema.phenotype import PhenotypeFeature as OldPhenotypeFeature
    
    log = logging.getLogger('convert.chemical.allele')
    log.info('begin')
    output_creator = OutputCreator(log)
    
    try:
        new_session = new_session_maker()
        old_session = old_session_maker()     
        
        #Grab all current objects
        current_objs = new_session.query(NewAllele).all()
        id_to_current_obj = dict([(x.id, x) for x in current_objs])
        key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs]) 
                  
        #Values to check
        values_to_check = ['description', 'display_name']
                
        untouched_obj_ids = set(id_to_current_obj.keys())
        
        keys_already_seen = set()
            
        #Grab old objects
        old_objs = old_session.query(OldPhenotypeFeature).all()
        
        for old_obj in old_objs:
            #Convert old objects into new ones
            newly_created_objs = create_allele(old_obj)
                
            if newly_created_objs is not None:
                #Edit or add new objects
                for newly_created_obj in newly_created_objs:
                    key = newly_created_obj.unique_key()
                    if key not in keys_already_seen:
                        current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id]
                        current_obj_by_key = None if key not in key_to_current_obj else key_to_current_obj[key]
                        create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator)
                        keys_already_seen.add(key)
                        
                    if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_id.id)
                    if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                        untouched_obj_ids.remove(current_obj_by_key.id)
                        
        #Delete untouched objs
        for untouched_obj_id  in untouched_obj_ids:
            new_session.delete(id_to_current_obj[untouched_obj_id])
            output_creator.removed()

        #Commit
        output_creator.finished()
        new_session.commit()
        
    except Exception:
        log.exception('Unexpected error:' + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()
        
    log.info('complete')
def convert_genetic_interevidence(old_session_maker, new_session_maker, chunk_size):
    from model_new_schema.interaction import Geninteractionevidence as NewGeninteractionevidence
    from model_new_schema.reference import Reference as NewReference
    from model_new_schema.evelement import Experiment as NewExperiment
    from model_new_schema.bioentity import Bioentity as NewBioentity
    from model_new_schema.phenotype import Phenotype as NewPhenotype
    from model_old_schema.interaction import Interaction as OldInteraction

    log = logging.getLogger("convert.genetic_interaction.evidence")
    log.info("begin")
    output_creator = OutputCreator(log)

    try:
        new_session = new_session_maker()
        old_session = old_session_maker()

        # Values to check
        values_to_check = [
            "experiment_id",
            "reference_id",
            "strain_id",
            "source",
            "bioentity1_id",
            "bioentity2_id",
            "phenotype_id",
            "note",
            "annotation_type",
            "date_created",
            "created_by",
        ]

        # Grab cached dictionaries
        key_to_experiment = dict([(x.unique_key(), x) for x in new_session.query(NewExperiment).all()])
        key_to_phenotype = dict([(x.unique_key(), x) for x in new_session.query(NewPhenotype).all()])
        bioent_ids = dict([(x.unique_key(), x) for x in new_session.query(NewBioentity).all()])
        reference_ids = set([x.id for x in new_session.query(NewReference).all()])

        min_id = old_session.query(func.min(OldInteraction.id)).first()[0]
        count = old_session.query(func.max(OldInteraction.id)).first()[0] - min_id
        num_chunks = ceil(1.0 * count / chunk_size)
        for i in range(0, num_chunks):
            # Grab all current objects
            current_objs = (
                new_session.query(NewGeninteractionevidence)
                .filter(NewGeninteractionevidence.id >= create_genetic_evidence_id(min_id))
                .filter(NewGeninteractionevidence.id < create_genetic_evidence_id(min_id + chunk_size))
                .all()
            )
            id_to_current_obj = dict([(x.id, x) for x in current_objs])
            key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs])

            untouched_obj_ids = set(id_to_current_obj.keys())

            # Grab old objects
            old_objs = (
                old_session.query(OldInteraction)
                .filter(OldInteraction.id >= min_id)
                .filter(OldInteraction.id < min_id + chunk_size)
                .options(
                    joinedload("interaction_references"),
                    joinedload("interaction_phenotypes"),
                    joinedload("feature_interactions"),
                )
            )

            for old_obj in old_objs:
                # Convert old objects into new ones
                newly_created_objs = create_genetic_interevidence(
                    old_obj, key_to_experiment, key_to_phenotype, reference_ids, bioent_ids
                )

                if newly_created_objs is not None:
                    # Edit or add new objects
                    for newly_created_obj in newly_created_objs:
                        current_obj_by_id = (
                            None
                            if newly_created_obj.id not in id_to_current_obj
                            else id_to_current_obj[newly_created_obj.id]
                        )
                        current_obj_by_key = (
                            None
                            if newly_created_obj.unique_key() not in key_to_current_obj
                            else key_to_current_obj[newly_created_obj.unique_key()]
                        )
                        create_or_update(
                            newly_created_obj,
                            current_obj_by_id,
                            current_obj_by_key,
                            values_to_check,
                            new_session,
                            output_creator,
                        )

                        if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_id.id)
                        if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids:
                            untouched_obj_ids.remove(current_obj_by_key.id)

            # Delete untouched objs
            for untouched_obj_id in untouched_obj_ids:
                new_session.delete(id_to_current_obj[untouched_obj_id])
                output_creator.removed()

            # Commit
            output_creator.finished(str(i + 1) + "/" + str(int(num_chunks)))
            new_session.commit()
            min_id = min_id + chunk_size

    except Exception:
        log.exception("Unexpected error:" + str(sys.exc_info()[0]))
    finally:
        new_session.close()
        old_session.close()

    log.info("complete")