Ejemplo n.º 1
0
    def load_objects(self, is_bedgraph, verbose=False, test=False):
        """Load files and initialize object.

        *Keyword arguments:*

            - is_bedgraph -- Whether regions are in bedgraph format (default = False).
            - verbose -- Verbose output (default = False).
            - test -- Fetch only 10 regions form each BED files for test.
        """
        for i, t in enumerate(self.types):
            if verbose:
                print("Loading file ",
                      self.files[self.names[i]],
                      file=sys.stderr)

            if t not in ["regions", "genes"] and verbose:
                print("Cannot load objects", file=sys.stderr)

            if t == "regions":
                regions = GenomicRegionSet(self.names[i])
                if is_bedgraph:
                    regions.read_bedgraph(
                        os.path.abspath(self.files[self.names[i]]))
                else:
                    regions.read_bed(os.path.abspath(
                        self.files[self.names[i]]))
                    if test: regions.sequences = regions.sequences[0:11]
                self.objectsDict[self.names[i]] = regions

            elif t == "genes":
                genes = GeneSet(self.names[i])
                genes.read(
                    os.path.abspath(self.files[self.names[i]])
                )  # Here change the relative path into absolute path
                self.objectsDict[self.names[i]] = genes
Ejemplo n.º 2
0
    def load_objects(self, is_bedgraph, verbose=False, test=False):
        """Load files and initialize object.

        *Keyword arguments:*

            - is_bedgraph -- Whether regions are in bedgraph format (default = False).
            - verbose -- Verbose output (default = False).
            - test -- Fetch only 10 regions form each BED files for test.
        """
        for i, t in enumerate(self.types):
            if verbose: print("Loading file ", self.files[self.names[i]], file = sys.stderr)
            
            if t not in ["regions", "genes"] and verbose:
                print("Cannot load objects", file=sys.stderr)
            
            if t == "regions":
                regions = GenomicRegionSet(self.names[i])
                if is_bedgraph:
                    regions.read_bedgraph(os.path.abspath(self.files[self.names[i]]))
                    
                else:
                    if test:
                        g = GenomicRegionSet(self.names[i])
                        g.read_bed(os.path.abspath(self.files[self.names[i]]))
                        regions.sequences = g.sequences[0:11]
                    else:
                        regions.read_bed(os.path.abspath(self.files[self.names[i]]))  # Here change the relative path into absolute path
                self.objectsDict[self.names[i]] = regions
            
            elif t == "genes":
                genes = GeneSet(self.names[i])
                genes.read(os.path.abspath(self.files[self.names[i]]))  # Here change the relative path into absolute path
                self.objectsDict[self.names[i]] = genes
Ejemplo n.º 3
0
    def match_ms_tags(self, field, test=False):
        """Add more entries to match the missing tags of the given field. For example, there are tags for cell like 'cell_A' and 'cell_B' for reads, but no these tag for regions. Then the regions are repeated for each tags from reads to match all reads.

        *Keyword arguments:*

            - field -- Field to add extra entries.
        """

        # check regions or reads have empty tag
        altypes = self.fieldsDict[field].keys()
        if "ALL" in altypes:
            altypes.remove("ALL")
            for name in self.fieldsDict[field]["ALL"]:
                i = self.names.index(name)
                for t in altypes:
                    # print("\t"+t)
                    n = name + "_" + t
                    # print("\t\t"+n)
                    self.names.append(n)
                    self.types.append(self.types[i])
                    self.files[n] = self.files[name]
                    # types = self.get_types(name,skip_all=True)
                    # print("************")
                    # print(types)

                    for f in self.fields[3:]:
                        if f == field:
                            try:
                                self.fieldsDict[f][t].append(n)
                            except:
                                self.fieldsDict[f][t] = [n]
                        else:
                            try:
                                self.fieldsDict[f][self.get_type(
                                    name=name, field=f)].append(n)
                            except:
                                self.fieldsDict[f][self.get_type(
                                    name=name, field=f)] = [n]
                    # for f in self.fieldsDict.keys():
                    #     for ty in types:
                    #         try: self.fieldsDict[f][ty].append(n)
                    #         except: pass
                    if self.types[i] == "regions":
                        g = GenomicRegionSet(n)
                        g.read_bed(self.files[name])
                        if test: g.sequences = g.sequences[0:11]
                        self.objectsDict[n] = g
                    self.trash.append(name)