예제 #1
0
    def run_real(self):
        """Runs the applications"""
        if len(self.args) != 3:
            self.error("exactly three input files are expected" + str(len(self.args)))

        if self.options.ev_codes not in self.evidence:
            self.error("The three valid types of evidence codes are: " +
                       "EXP, ALL_BUT_IEA, and ALL")

        self.options.results_by_protein = self.__parse_bool(self.options.results_by_protein)
        if not self.options.results_by_protein and not self.options.results_by_arch:
            self.error("Either results by protein or by arch should be set")

        if self.options.max_pvalue < 0.0 or self.options.max_pvalue > 1.0:
            self.error("The maximum p-value should be between 0.0 and 1.0")
        codes = set(self.evidence[self.options.ev_codes])

        go_file, archA, goaB = self.args
        self.log.info("Loading GO tree from %s..." % go_file)
        self.go_tree = GOTree.from_obo(go_file)
        goa = self.read_goa_file(goaB, codes)

        if not self.options.source_arch and self.options.both:
            self.error("A source architecture file should be specified in 'both' mode")

        if not self.options.source_arch:
            self._transfer_from_same_file(goa, archA)
        elif not self.options.both:
            self._transfer_from_other_file(goa, archA, self.options.source_arch)
        else:
            self._transfer_from_both(goa, archA, self.options.source_arch)
예제 #2
0
파일: overrep.py 프로젝트: Gustibimo/gfam
    def run_real(self):
        """Runs the overrepresentation analysis application"""
        if len(self.args) != 3:
            self.error("expected exactly three input file names")

        go_tree_file, go_mapping_file, input_file = self.args

        self.log.info("Loading GO tree from %s..." % go_tree_file)
        self.go_tree = GOTree.from_obo(go_tree_file)

        self.log.info("Loading InterPro --> GO mapping from %s..." % \
                go_mapping_file)
        self.go_mapping = InterPro2GOMapping.from_file(go_mapping_file, self.go_tree)

        self.log.info("Processing domain architectures from %s..." % \
                input_file)
        return self.process_file(input_file)
예제 #3
0
    def run_real(self):
        """Runs the label assignment application"""
        if len(self.args) != 3:
            self.error("expected exactly three input file names")

        go_tree_file, go_mapping_file, input_file = self.args

        self.log.info("Loading GO tree from %s...", go_tree_file)
        self.go_tree = GOTree.from_obo(go_tree_file)

        self.log.info("Loading InterPro --> GO mapping from %s...",
                      go_mapping_file)
        self.go_mapping = InterPro2GOMapping.from_file(go_mapping_file,
                                                       self.go_tree)

        self.log.info("Processing domain architectures from %s...", input_file)
        return self.process_file(input_file)
예제 #4
0
    def run_real(self):
        """Runs the overrepresentation analysis application"""
        if len(self.args) != 3:
            self.error("expected exactly three input file names")

        go_tree_file, go_mapping_file, input_file = self.args
        self.options.results_by_protein = self.__parse_bool(self.options.results_by_protein)

        if not self.options.results_by_protein and not self.options.arch_file:
            self.error("Either results by protein or by arch should be set")

        self.log.info("Loading GO tree from %s..." % go_tree_file)
        self.go_tree = GOTree.from_obo(go_tree_file)

        self.log.info("Loading InterPro --> GO mapping from %s..." % \
                go_mapping_file)
        self.go_mapping = InterPro2GOMapping.from_file(go_mapping_file, self.go_tree)

        self.log.info("Processing domain architectures from %s..." % \
                input_file)
        return self.process_file(input_file)
예제 #5
0
    def run_real(self):
        """Runs the overrepresentation analysis application"""
        if len(self.args) != 3:
            self.error("expected exactly three input file names")

        go_tree_file, go_mapping_file, input_file = self.args
        rbp = self.options.results_by_protein
        self.options.results_by_protein = self.__parse_bool(rbp)

        if not self.options.results_by_protein and not self.options.arch_file:
            self.error("Either results by protein or by arch should be set")

        self.log.info("Loading GO tree from %s...", go_tree_file)
        self.go_tree = GOTree.from_obo(go_tree_file)

        self.log.info("Loading InterPro --> GO mapping from %s...",
                      go_mapping_file)
        self.go_mapping = InterPro2GOMapping.from_file(go_mapping_file,
                                                       self.go_tree)

        self.log.info("Processing domain architectures from %s...", input_file)
        return self.process_file(input_file)
예제 #6
0
    def run_real(self):
        """Runs the applications"""
        if len(self.args) != 3:
            self.error("exactly three input files are expected" +
                       str(len(self.args)))

        if self.options.ev_codes not in self.evidence:
            self.error("The three valid types of evidence codes are: " +
                       "EXP, ALL_BUT_IEA, and ALL")

        rbp = self.options.results_by_protein
        self.options.results_by_protein = self.__parse_bool(rbp)
        if not self.options.results_by_protein\
           and not self.options.results_by_arch:
            self.error("Either results by protein or by arch should be set")

        if self.options.max_pvalue < 0.0 or self.options.max_pvalue > 1.0:
            self.error("The maximum p-value should be between 0.0 and 1.0")
        codes = set(self.evidence[self.options.ev_codes])

        go_file, arch_a, goa_b = self.args
        self.log.info("Loading GO tree from %s...", go_file)
        self.go_tree = GOTree.from_obo(go_file)
        goa = self.read_goa_file(goa_b, codes)

        if not self.options.source_arch and self.options.both:
            self.error("A source architecture file "
                       "should be specified in 'both' mode")

        if not self.options.source_arch:
            self._transfer_from_same_file(goa, arch_a)
        elif not self.options.both:
            self._transfer_from_other_file(goa, arch_a,
                                           self.options.source_arch)
        else:
            self._transfer_from_both(goa, arch_a, self.options.source_arch)
예제 #7
0
    def __init__(self, go_file, goa_file, output_directory, proteins):
        self.go_tree = GOTree.from_obo(go_file)
        self.terms_per_ontology = defaultdict(set)
        self.ontology_per_term = {}
        for goterm, term in self.go_tree.terms.items():
            try:
                namespace = str(term.tags["namespace"][0])
                goterm_id = str(term.tags["id"][0])
                self.terms_per_ontology[namespace].add(goterm_id)
                self.ontology_per_term[goterm_id] = namespace
            except KeyError:
                # this is done to avoid reading the Typedef entries
                pass

        for ontology, terms in self.terms_per_ontology.items():
            print ontology, " ", len(terms)

        self.goa = self._read_goa_file(goa_file)
        self.output_dir = output_directory
        self._create_output_directory()
        self.proteins = set()
        with open(proteins, "r") as fin:
            for line in fin:
                self.proteins.add(line.strip())
예제 #8
0
    def __init__(self, go_file, goa_file, output_directory, proteins):
        self.go_tree = GOTree.from_obo(go_file)
        self.terms_per_ontology = defaultdict(set)
        self.ontology_per_term = {}
        for _goterm, term in self.go_tree.terms.items():
            try:
                namespace = str(term.tags["namespace"][0])
                goterm_id = str(term.tags["id"][0])
                self.terms_per_ontology[namespace].add(goterm_id)
                self.ontology_per_term[goterm_id] = namespace
            except KeyError:
                # this is done to avoid reading the Typedef entries
                pass

        for ontology, terms in self.terms_per_ontology.items():
            print("{} {}".format(ontology, len(terms)))

        self.goa = self._read_goa_file(goa_file)
        self.output_dir = output_directory
        self._create_output_directory()
        self.proteins = set()
        with open(proteins, "r") as fin:
            for line in fin:
                self.proteins.add(line.strip())