Ejemplo n.º 1
0
def groundtruth(curated, gt_type, verbose=True):
    # open gt files, compute if necessary
    filename = filename_groundtruth.format('_curated' if curated else '',
                                           gt_type)
    assert gt_type in groundtruth_types
    if not os.path.exists(filename):
        _make_groundtruth(curated=curated)
    return CausalArray.load(file=filename)
 def gt(self, name, successful_ints=False, change_strint_name=False):
     """Groundtruth measures as a CausalArray.
     Args:
         name (str): type of groundtruth
     
     Returns:
         CausalArray: groundtruth
     """
     causes = self.mutants
     effects = self.genes
     if name in ('gt_abs', 'gt.abs', 'abs'):
         array = self.gt_abs
         units = 'j'
     elif name in ('gt_rel', 'gt.rel', 'rel'):
         array = self.gt_rel
         units = 'j/i'
     elif name in ('gt_abs_norm', 'gt.abs.norm', 'abs_norm', 'abs.norm'):
         array = self.gt_abs_norm
         units = '1'
     elif name in ('gt_abs_norm_robust', 'gt.abs.norm.robust',
                   'abs_norm_robust', 'abs.norm.robust'):
         array = self.gt_abs_norm_robust
         units = '1'
     elif name in ('gt_rel_norm', 'gt.rel.norm', 'rel_norm', 'rel.norm'):
         array = self.gt_rel_norm
         units = '1'
     elif name in ('gt_nature', 'gt.nature', 'nature'):
         array = self.gt_nature
         units = '1'
     elif name in ('gt_sie', 'gt.sie', 'sie'):
         array = self.gt_sie
         units = '0'
     else:
         raise Exception('Unknown groundtruth %s!' % name)
     ## only select successfull ints, put the others to zero OR smaller
     # if successful_ints:
     #     array[[self.map_mutants[i] for i in list(set(self.mutants).difference(set(self.successful_ints)))],:] = 0
     if successful_ints:
         causes = self.successful_ints
         if len(causes) == 0:
             raise Exception('No successful interventions found!')
         array = array[[self.map_mutants[i] for i in causes], ]
         if change_strint_name:
             name += '_strint'
     return CausalArray(array=array,
                        causes=causes,
                        effects=effects,
                        units=units,
                        name=name)
Ejemplo n.º 3
0
def _make_groundtruth(curated, round_size=None, verbose=True):
    """ Query SGD and compile groundtruth."""
    start_time = time.time()
    if verbose:
        print 'Querying yeastmine for %s' % 'manually curated' if curated else 'all' + 'interactions.'

    # Start intermine service
    yeastmine = Service('http://yeastmine.yeastgenome.org/yeastmine/service')

    # 1. Get a list of all genes in quick query. Note: these are unicode formated.
    query = yeastmine.new_query('Gene')
    query.add_view('secondaryIdentifier')
    query.add_sort_order('Gene.secondaryIdentifier', 'ASC')
    query.add_constraint('Gene',
                         'IN',
                         'ALL_Verified_Uncharacterized_Dubious_ORFs',
                         code='A')

    # create genes list and map
    genes_list = []
    min_length = 100
    for row in query.rows():
        gene = str(row['secondaryIdentifier'])
        genes_list.append(str(gene))
        if len(gene) < min_length:
            min_length = len(gene)
    if verbose: print 'Gene char length min:', min_length
    genes_map = dict(zip(genes_list, range(len(genes_list))))

    if verbose:
        print 'Number of genes: {}'.format(
            len(genes_list)
        )  # should be 6604 for ALL_Verified_Uncharacterized_Dubious_ORFs

    #  Query for interactions
    query = yeastmine.new_query('Gene')
    query.add_view('secondaryIdentifier', 'interactions.details.type',
                   'interactions.participant2.secondaryIdentifier',
                   'interactions.details.role1')
    #query.add_view('secondaryIdentifier', 'interactions.details.type', 'interactions.details.role1')
    query.add_sort_order('Gene.secondaryIdentifier', 'ASC')
    query.add_constraint('organism.shortName', '=', 'S. cerevisiae', code='B')
    query.add_constraint('Gene',
                         'IN',
                         'ALL_Verified_Uncharacterized_Dubious_ORFs',
                         code='A')
    if curated:
        query.add_constraint('interactions.details.annotationType',
                             '=',
                             'manually curated',
                             code='C')

    # Matrices
    adjacency_matrix_genetic = np.zeros((len(genes_list), len(genes_list)),
                                        dtype='bool')
    adjacency_matrix_physical = np.zeros((len(genes_list), len(genes_list)),
                                         dtype='bool')

    # Add to correct adjacency matrix
    current_gene = None
    for row in query.rows():
        if (current_gene != str(row['secondaryIdentifier'])):
            current_gene = str(row['secondaryIdentifier'])
            print 'Querying {} - {}'.format(genes_map[current_gene],
                                            current_gene)

        # # Test genes, print all queried info
        # if (current_gene == u'YAL008W'): # or current_gene == u'Q0085' or current_gene == 'Q0105'):
        #     if row['interactions.details.role1'] == u'Bait':
        #         print row['secondaryIdentifier'], '<--' , row['interactions.details.type'], '---', row['interactions.participant2.secondaryIdentifier']
        #     else:
        #         print row['secondaryIdentifier'], '---' , row['interactions.details.type'], '-->', row['interactions.participant2.secondaryIdentifier']

        # Fill adjacency matrices for physical and genetic relationships
        if (str(row['interactions.participant2.secondaryIdentifier'])
                in genes_list):
            if (row['interactions.details.type'] == u'physical interactions'):
                if row['interactions.details.role1'] == u'Bait':
                    # Bait <--- Hit relationship found.
                    # maybe check if the reverse already exists?
                    adjacency_matrix_physical[genes_map[row['interactions.participant2.secondaryIdentifier']],\
                        genes_map[str(row['secondaryIdentifier'])]] = True
            elif row['interactions.details.type'] == u'genetic interactions':
                if row['interactions.details.role1'] == u'Bait':
                    # Bait <--- Hit relationship found.
                    adjacency_matrix_genetic[genes_map[row['interactions.participant2.secondaryIdentifier']],\
                        genes_map[str(row['secondaryIdentifier'])]] = True

    if verbose:
        print 'Nuber of genetic interactions {}'.format(
            adjacency_matrix_genetic.sum())
        print 'Nuber of physical interactions {}'.format(
            adjacency_matrix_physical.sum())

    # save all data + write date in each.
    CausalArray(array=adjacency_matrix_genetic,
                causes=genes_list,
                name='sgd_gen{}'.format('_cur' if curated else '')).save(
                    file=filename_groundtruth.format(
                        '_curated' if curated else '', 'genetic'))
    # write_datestamp_hdf5(filename_groundtruth.format('_curated' if curated else '', 'genetic'))
    CausalArray(array=adjacency_matrix_physical,
                causes=genes_list,
                name='sgd_phys{}'.format('_cur' if curated else '')).save(
                    file=filename_groundtruth.format(
                        '_curated' if curated else '', 'physical'))
    # write_datestamp_hdf5(filename_groundtruth.format('_curated' if curated else '', 'physical'))
    CausalArray(array=np.add(adjacency_matrix_genetic,
                             adjacency_matrix_physical),
                causes=genes_list,
                name='sgd_all{}'.format('_cur' if curated else '')).save(
                    file=filename_groundtruth.format(
                        '_curated' if curated else '', 'all'))
    # write_datestamp_hdf5(filename_groundtruth.format('_curated' if curated else '', 'all'))
    if verbose: print 'Done in {:.2f}'.format(time.time() - start_time)
Ejemplo n.º 4
0
def gt_inclusive_genetic():
    # return groundtruth(curated=False, gt_type='genetic')
    return CausalArray.load(file=filename_groundtruth.format('', 'genetic'))
Ejemplo n.º 5
0
def gt_inclusive_physical():
    # return groundtruth(curated=False, gt_type='physical')
    return CausalArray.load(file=filename_groundtruth.format('', 'physical'))
Ejemplo n.º 6
0
def gt_curated_genetic():
    # return groundtruth(curated=True, gt_type='genetic')
    return CausalArray.load(
        file=filename_groundtruth.format('_curated', 'genetic'))
Ejemplo n.º 7
0
def gt_curated_physical():
    # return groundtruth(curated=True, gt_type='physical')
    return CausalArray.load(
        file=filename_groundtruth.format('_curated', 'physical'))