def analyze(matrixtype, depth, processors,): """ Performs an analysis of the type given by matrixtype with the given background set and :param matrixtype: :param depth: :param processors: :return: """ source_bulbs_ids = get_source_bulbs_ids() background_bulbs_ids = get_background_bulbs_ids() # TODO: CRICIAL: inject background usage when background switch is available. # Refer to the analysis pipeline example for an example interactome_interface_instance = InteractomeInterface(main_connex_only=True, full_impact=False) interactome_interface_instance.fast_load() ref_param_set = [['biological_process'], background_bulbs_ids, (1, 1), True, 3] if matrixtype == 'interactome': interactome_analysis(source_bulbs_ids, depth, processors, background_bulbs_ids) elif matrixtype == 'annotome': knowledge_analysis(source=source_bulbs_ids, desired_depth=depth, processors=processors, param_set=ref_param_set) print "analsysis is finished, current results are stored " \ "in the outputs directory"
def rebuildlaplacians(): """ Extracts the Laplacian matrices from the master graph database. \f :return: """ from bioflow.utils.top_level import rebuild_the_laplacians from bioflow.utils.io_routines import get_background_bulbs_ids background_bulbs_ids = get_background_bulbs_ids() rebuild_the_laplacians(all_detectable_genes=background_bulbs_ids)
def map_and_save_gene_ids(hit_genes_location, all_detectable_genes_location=''): cast_analysis_set_to_bulbs_ids(hit_genes_location) hit_genes_ids = get_source_bulbs_ids() if all_detectable_genes_location: cast_background_set_to_bulbs_id( background_set_csv_location=all_detectable_genes_location, analysis_set_csv_location=hit_genes_location) all_detectable_genes_ids = get_background_bulbs_ids() else: all_detectable_genes_ids = [] writer(open(Dumps.background_set_bulbs_ids, 'w'), delimiter='\n').writerow(all_detectable_genes_ids) return hit_genes_ids, all_detectable_genes_ids
def extractmatrix(matrixtype): """ Extracts the matrix interface object for the computation routine. :param matrixtype: :return: """ if matrixtype == 'interactome': local_matrix = InteractomeInterface(main_connex_only=True, full_impact=False) local_matrix.full_rebuild() if matrixtype == 'annotome': local_matrix = InteractomeInterface(main_connex_only=True, full_impact=False) local_matrix.fast_load() ref_param_set = [['biological_process'], get_background_bulbs_ids(), (1, 1), True, 3] annot_matrix = AnnotomeInterface(*ref_param_set) annot_matrix.full_rebuild()
def interactomeanalysis(depth, processors, skipsampling, skiphitflow): """ Performs interactome analysis given background set given earlier. \f :param depth: :param processors: :param skipsampling: :param skiphitflow: :return: """ from bioflow.utils.io_routines import get_background_bulbs_ids, get_source_bulbs_ids from bioflow.molecular_network.interactome_analysis import auto_analyze as interactome_analysis source_bulbs_ids = get_source_bulbs_ids() background_bulbs_ids = get_background_bulbs_ids() interactome_analysis([source_bulbs_ids], desired_depth=depth, processors=processors, background_list=background_bulbs_ids, skip_sampling=skipsampling, from_memoization=skiphitflow)
those that share a significant amount of reached_uniprots_bulbs_id_list in common """ self.Indep_Lapl = lil_matrix((len(self.All_GOs), len(self.All_GOs))) for GO_list in self.UP2GO_Reachable_nodes.itervalues(): for GO1, GO2 in combinations(GO_list, 2): idx1, idx2 = (self.GO2Num[GO1], self.GO2Num[GO2]) self.Indep_Lapl[idx1, idx2] += -1 self.Indep_Lapl[idx2, idx1] += -1 self.Indep_Lapl[idx2, idx2] += 1 self.Indep_Lapl[idx1, idx1] += 1 if __name__ == '__main__': # Creates an instance of MatrixGetter and loads pre-computed values go_interface_instance = GeneOntologyInterface(uniprot_node_ids=get_background_bulbs_ids()) go_interface_instance.full_rebuild() # loading takes 1-6 seconds. # fill for reach only is done in 2 seconds, # tepping takes another 15, # inverting + info computation - 1 more second # Laplacian building => ## # full computation - 3 minutes 18 seconds; save 7 seconds, retrieval - 3 # seconds # go_interface_instance.load() # print go_interface_instance.pretty_time() # go_interface_instance.get_indep_linear_groups()
those that share a significant amount of reached_uniprots_neo4j_id_list in common """ self.Indep_Lapl = lil_matrix((len(self.All_GOs), len(self.All_GOs))) for GO_list in self.UP2GO_Reachable_nodes.itervalues(): for GO1, GO2 in combinations(GO_list, 2): idx1, idx2 = (self.GO2Num[GO1], self.GO2Num[GO2]) self.Indep_Lapl[idx1, idx2] += -1 self.Indep_Lapl[idx2, idx1] += -1 self.Indep_Lapl[idx2, idx2] += 1 self.Indep_Lapl[idx1, idx1] += 1 if __name__ == '__main__': # Creates an instance of MatrixGetter and loads pre-computed values go_interface_instance = GeneOntologyInterface(uniprot_node_ids=get_background_bulbs_ids()) go_interface_instance.full_rebuild() # loading takes 1-6 seconds. # fill for reach only is done in 2 seconds, # tepping takes another 15, # inverting + info computation - 1 more second # Laplacian building => ## # full computation - 3 minutes 18 seconds; save 7 seconds, retrieval - 3 # seconds # go_interface_instance.load() # print go_interface_instance.pretty_time() # go_interface_instance.get_indep_linear_groups()
for group in nr_groups: log.info(group) log.info('\t Node_ID \t Name \t current \t connectedness \t p_value') for node in nr_nodes: log.info('\t %s \t %s \t %s \t %s \t %s', *node) if __name__ == "__main__": # pprinter = PrettyPrinter(indent=4) # interactome_interface_instance = MatrixGetter(True, False) # interactome_interface_instance.fast_load() # dumplist = undump_object(Dumps.RNA_seq_counts_compare) # MG1.randomly_sample([150], [1], chromosome_specific=15, No_add=True) # nr_nodes, nr_groups = compare_to_blanc(150, [0.5, 0.6], MG1, p_val=0.9) # MG1.export_conduction_system() # for group in nr_groups: # print group # for node in nr_nodes: # print node source = get_source_bulbs_ids() background_list = get_background_bulbs_ids() auto_analyze([source], desired_depth=5, processors=6, background_list=background_list, skip_sampling=True)
# # building the neo4j database # build_db() # # set the source file of the ids of perturbed proteins and background set: # "/home/andrei/2nd_pass_2x.txt" # "/home/andrei/Linhao_imaging.txt" # "/home/andrei/HS_30_Linhao_outliers.txt" # cast_analysis_set_to_bulbs_ids("/home/andrei/Linhao_imaging.txt") # # cast_background_set_to_bulbs_id( # background_set_csv_location=None, # analysis_set_csv_location="/home/andrei/HS_30_Linhao_outliers.txt") # # get the bulbs ids oif the nodes we would like to analyze source_bulbs_ids = get_source_bulbs_ids() background_bulbs_ids = get_background_bulbs_ids() print len(source_bulbs_ids) print len(background_bulbs_ids) # # building the interactome interface object # local_matrix = InteractomeInterface(main_connex_only=True, full_impact=False) # local_matrix.full_rebuild() # # perform the interactome analysis # interactome_analysis([source_bulbs_ids], desired_depth=24, processors=3, # background_list=background_bulbs_ids, skip_sampling=False) # # building the reference parameters set _filter = ['biological_process'] ref_param_set = [_filter, background_bulbs_ids, (1, 1), True, 3]
""" self.Indep_Lapl = lil_matrix((len(self.All_GOs), len(self.All_GOs))) for GO_list in self.UP2GO_Reachable_nodes.itervalues(): for GO1, GO2 in combinations(GO_list, 2): idx1, idx2 = (self.GO2Num[GO1], self.GO2Num[GO2]) self.Indep_Lapl[idx1, idx2] += -1 self.Indep_Lapl[idx2, idx1] += -1 self.Indep_Lapl[idx2, idx2] += 1 self.Indep_Lapl[idx1, idx1] += 1 if __name__ == '__main__': # Creates an instance of MatrixGetter and loads pre-computed values go_interface_instance = GeneOntologyInterface( uniprot_node_ids=get_background_bulbs_ids()) go_interface_instance.full_rebuild() # loading takes 1-6 seconds. # fill for reach only is done in 2 seconds, # tepping takes another 15, # inverting + info computation - 1 more second # Laplacian building => ## # full computation - 3 minutes 18 seconds; save 7 seconds, retrieval - 3 # seconds # go_interface_instance.load() # print go_interface_instance.pretty_time() # go_interface_instance.get_indep_linear_groups()