예제 #1
0
 def undump_eigen(self):
     """
     undumps self.adj_eigenvals and self.laplacian_matrix
     """
     self.adj_eigenvals, self.adj_eigenvects = undump_object(
         Dumps.val_eigen)
     self.cond_eigenvals, self.cond_eigenvects = undump_object(
         Dumps.cond_eigen)
예제 #2
0
 def undump_matrices(self):
     """
     undumps self.adjacency_Matrix and self.laplacian_matrix
     """
     self.adjacency_Matrix = undump_object(
         Dumps.interactome_adjacency_matrix)
     self.laplacian_matrix = undump_object(
         Dumps.interactome_laplacian_matrix)
예제 #3
0
 def setUpClass(cls):
     parser_object = UniProtParser(['199310', '405955'])
     cls.uniprot_dict = parser_object.parse_uniprot(cls.up_to_parse)
     cls.acces_dict = parser_object.get_access_dicts()
     # dump_object(cls.ref_parses, (cls.uniprot_dict, cls.acces_dict))
     cls.ref_uniprot_dict, cls.ref_acces_dict = undump_object(
         cls.ref_parses)
예제 #4
0
 def setUpClass(cls):
     cls.actual_parser = ReactomeParser(cls.reactome_to_parse)
     cls.actual_parser.parse_all()
     # print cls.actual_parser.parsed
     # dump_object(cls.ref_parse, cls.actual_parser)
     cls.ref_parser = undump_object(cls.ref_parse)
     # to parse
     cls.maxDiff = None
예제 #5
0
 def setUpClass(cls):
     cls.actual_parser = ReactomeParser(cls.reactome_to_parse)
     cls.actual_parser.parse_all()
     # print cls.actual_parser.parsed
     # dump_object(cls.ref_parse, cls.actual_parser)
     cls.ref_parser = undump_object(cls.ref_parse)
     # to parse
     cls.maxDiff = None
예제 #6
0
 def undump_maps(self):
     """
     undumps all the elements required for the mapping between the types and ids of
     database entries and matrix columns
     """
     log.debug("undumping from %s", Dumps.interactome_maps)
     self.bulbs_id_2_matrix_index, self.matrix_index_2_bulbs_id, \
         self.bulbs_id_2_display_name, self.bulbs_id_2_legacy_id, self.bulbs_id2_node_type, \
         self.bulbs_id_2_localization, self.reached_uniprots_bulbs_id_list,\
         self.all_uniprots_bulbs_id_list, self.Uniprot_attachments, self.UP2Chrom, \
         self.chromosomes_2_uniprot, self.uniprot_matrix_index_list,\
         self.entry_point_uniprots_bulbs_ids = \
         undump_object(Dumps.interactome_maps)
     log.debug("post-undump e_p_u_b_i length: %s", len(self.entry_point_uniprots_bulbs_ids))
예제 #7
0
def auto_analyze(source=None, go_interface_instance=None, processors=3, desired_depth=24,
                 skip_sampling=False, param_set=ref_param_set):
    """
    Automatically analyzes the GO annotation of the RNA_seq results.

    :param source:
    :param go_interface_instance:
    :param processors:
    :param desired_depth:
    :param skip_sampling: uses existing mongoDB content without spawning a sampler
    :param param_set:
    """
    if source is None:
        dumplist = undump_object(Dumps.RNA_seq_counts_compare)
    else:
        dumplist = source

    if desired_depth % processors != 0:
        desired_depth = desired_depth / processors + 1
    else:
        desired_depth = desired_depth / processors

    # noinspection PyTypeChecker
    for my_list in dumplist:
        if go_interface_instance is None:
            go_interface_instance = get_go_interface_instance(param_set)

        go_interface_instance.set_uniprot_source(my_list)

        if not skip_sampling:
            log.info("spawning a sampler for %s proteins @ %s compops/sec",
                     len(go_interface_instance.analytic_uniprots), estimated_comp_ops)

        # TODO: restructure to spawn a sampler pool that does not share an object in the Threading
        if len(go_interface_instance.analytic_uniprots) < 200:

            if not skip_sampling:

                log.info('length: %s \t sampling depth: %s \t, estimated round time: %s min',
                         len(go_interface_instance.analytic_uniprots),
                         'full',
                         len(go_interface_instance.analytic_uniprots)**2 / estimated_comp_ops / 60)

                spawn_sampler_pool(processors,
                                   [len(go_interface_instance.analytic_uniprots)],
                                   [desired_depth],
                                   go_interface_instance=None,
                                   param_set=param_set)

            go_interface_instance.build_extended_conduction_system()
            nr_nodes, nr_groups = compare_to_blank(
                len(go_interface_instance.analytic_uniprots),
                [1100, 1300],
                p_val=0.9,
                go_interface_instance=go_interface_instance,
                param_set=param_set)

        else:
            sampling_depth = max(200 ** 2 / len(go_interface_instance.analytic_uniprots), 5)

            if not skip_sampling:

                log.info('length: %s \t sampling depth: %s \t, estimated round time: %s min',
                         len(go_interface_instance.analytic_uniprots),
                         sampling_depth,
                         len(go_interface_instance.analytic_uniprots) * sampling_depth / 2 / 60)

                spawn_sampler_pool(processors,
                                   [len(go_interface_instance.analytic_uniprots)],
                                   [desired_depth],
                                   sparse_rounds=sampling_depth,
                                   go_interface_instance=None,
                                   param_set=param_set)

            go_interface_instance.build_extended_conduction_system(sparse_samples=sampling_depth)
            # go_interface_instance.export_conduction_system()
            nr_nodes, nr_groups = compare_to_blank(
                len(go_interface_instance.analytic_uniprots),
                [1100, 1300],
                p_val=0.9, sparse_rounds=sampling_depth,
                go_interface_instance=go_interface_instance,
                param_set=param_set)

        go_interface_instance.export_conduction_system()

        for group in nr_groups:
            log.info(group)
        log.info('\t NodeID \t Name \t current \t informativity \t confusion_potential \t p_val \t '
                 'UP_list')
        for node in nr_nodes:
            log.info('\t %s \t %s \t %s \t %s \t %s \t %s \t %s', *node)
예제 #8
0
 def undump_independent_linear_sets(self):
     self.Indep_Lapl = undump_object(Dumps.GO_Indep_Linset)
예제 #9
0
 def undump_memoized():
     """
     :return: undumped memoized analysis
     """
     return undump_object(Dumps.GO_Analysis_memoized)
예제 #10
0
 def setUpClass(cls):
     parser_object = UniProtParser(['199310', '405955'])
     cls.uniprot_dict = parser_object.parse_uniprot(cls.up_to_parse)
     cls.acces_dict = parser_object.get_access_dicts()
     # dump_object(cls.ref_parses, (cls.uniprot_dict, cls.acces_dict))
     cls.ref_uniprot_dict, cls.ref_acces_dict = undump_object(cls.ref_parses)
예제 #11
0
 def undump_independent_linear_sets(self):
     self.Indep_Lapl = undump_object(Dumps.GO_Indep_Linset)
예제 #12
0
 def undump_inflated_elements(self):
     self.inflated_Laplacian, self.inflated_idx2lbl, \
         self.inflated_lbl2idx, self.binding_intensity = \
         undump_object(Dumps.GO_Inflated)
예제 #13
0
 def undump_matrices(self):
     self.adjacency_matrix, self.dir_adj_matrix, self.laplacian_matrix = undump_object(
         Dumps.GO_Mats)
예제 #14
0
 def undump_statics():
     return undump_object(Dumps.GO_builder_stat)
예제 #15
0
 def undump_memoized():
     """ undumps memoized analysis """
     return undump_object(Dumps.Interactome_Analysis_memoized)
예제 #16
0
 def undump_statics():
     return undump_object(Dumps.GO_builder_stat)
예제 #17
0
 def undump_core(self):
     self.UP2GO_Dict, self.GO2UP, self.SeedSet, self.Reachable_nodes_dict,\
         self.GO_Names, self.GO_Legacy_IDs, self.rev_GO_IDs, self.All_GOs,\
         self.GO2Num, self.Num2GO, self.UP_Names, self.UPs_without_GO =\
         undump_object(Dumps.GO_dump)
예제 #18
0
 def undump_core(self):
     self.UP2GO_Dict, self.GO2UP, self.SeedSet, self.Reachable_nodes_dict,\
         self.GO_Names, self.GO_Legacy_IDs, self.rev_GO_IDs, self.All_GOs,\
         self.GO2Num, self.Num2GO, self.UP_Names, self.UPs_without_GO =\
         undump_object(Dumps.GO_dump)
예제 #19
0
 def undump_informativities(self):
     self.UP2GO_Reachable_nodes, self.GO2UP_Reachable_nodes, self.UP2GO_step_Reachable_nodes, \
         self.GO2UP_step_Reachable_nodes, self.GO2_Pure_Inf, self.GO2_Weighted_Ent = \
         undump_object(Dumps.GO_Infos)
예제 #20
0
 def undump_matrices(self):
     self.adjacency_matrix, self.dir_adj_matrix, self.laplacian_matrix = undump_object(
         Dumps.GO_Mats)
예제 #21
0
 def undump_memoized():
     """
     :return: undumped memoized analysis
     """
     return undump_object(Dumps.GO_Analysis_memoized)
예제 #22
0
 def undump_informativities(self):
     self.UP2GO_Reachable_nodes, self.GO2UP_Reachable_nodes, self.UP2GO_step_Reachable_nodes, \
         self.GO2UP_step_Reachable_nodes, self.GO2_Pure_Inf, self.GO2_Weighted_Ent = \
         undump_object(Dumps.GO_Infos)
예제 #23
0
def auto_analyze(source=None,
                 go_interface_instance=None,
                 processors=3,
                 desired_depth=24,
                 skip_sampling=False,
                 param_set=ref_param_set):
    """
    Automatically analyzes the GO annotation of the RNA_seq results.

    :param source:
    :param go_interface_instance:
    :param processors:
    :param desired_depth:
    :param skip_sampling: uses existing mongoDB content without spawning a sampler
    :param param_set:
    """
    if source is None:
        dumplist = undump_object(Dumps.RNA_seq_counts_compare)
    else:
        dumplist = source

    if desired_depth % processors != 0:
        desired_depth = desired_depth / processors + 1
    else:
        desired_depth = desired_depth / processors

    # noinspection PyTypeChecker
    for my_list in dumplist:
        if go_interface_instance is None:
            go_interface_instance = get_go_interface_instance(param_set)

        go_interface_instance.set_uniprot_source(my_list)

        if not skip_sampling:
            log.info("spawning a sampler for %s proteins @ %s compops/sec",
                     len(go_interface_instance.analytic_uniprots),
                     estimated_comp_ops)

        # TODO: restructure to spawn a sampler pool that does not share an object in the Threading
        if len(go_interface_instance.analytic_uniprots) < 200:

            if not skip_sampling:

                log.info(
                    'length: %s \t sampling depth: %s \t, estimated round time: %s min',
                    len(go_interface_instance.analytic_uniprots), 'full',
                    len(go_interface_instance.analytic_uniprots)**2 /
                    estimated_comp_ops / 60)

                spawn_sampler_pool(
                    processors, [len(go_interface_instance.analytic_uniprots)],
                    [desired_depth],
                    go_interface_instance=None)

            go_interface_instance.build_extended_conduction_system()
            nr_nodes, nr_groups = compare_to_blank(
                len(go_interface_instance.analytic_uniprots), [1100, 1300],
                p_val=0.9,
                go_interface_instance=go_interface_instance,
                param_set=param_set)

        else:
            sampling_depth = max(
                200**2 / len(go_interface_instance.analytic_uniprots), 5)

            if not skip_sampling:

                log.info(
                    'length: %s \t sampling depth: %s \t, estimated round time: %s min',
                    len(go_interface_instance.analytic_uniprots),
                    sampling_depth,
                    len(go_interface_instance.analytic_uniprots) *
                    sampling_depth / 2 / 6 / 60)

                spawn_sampler_pool(
                    processors, [len(go_interface_instance.analytic_uniprots)],
                    [desired_depth],
                    sparse_rounds=sampling_depth,
                    go_interface_instance=None)

            go_interface_instance.build_extended_conduction_system(
                sparse_samples=sampling_depth)
            # go_interface_instance.export_conduction_system()
            nr_nodes, nr_groups = compare_to_blank(
                len(go_interface_instance.analytic_uniprots), [1100, 1300],
                p_val=0.9,
                sparse_rounds=sampling_depth,
                go_interface_instance=go_interface_instance,
                param_set=param_set)

        go_interface_instance.export_conduction_system()

        for group in nr_groups:
            log.info(group)
        log.info(
            '\t NodeID \t Name \t current \t informativity \t confusion_potential \t p_val \t '
            'UP_list')
        for node in nr_nodes:
            log.info('\t %s \t %s \t %s \t %s \t %s \t %s \t %s', *node)
예제 #24
0
 def undump_inflated_elements(self):
     self.inflated_Laplacian, self.inflated_idx2lbl, \
         self.inflated_lbl2idx, self.binding_intensity = \
         undump_object(Dumps.GO_Inflated)