def load_objects(self, is_bedgraph, verbose=False, test=False): """Load files and initialize object. *Keyword arguments:* - is_bedgraph -- Whether regions are in bedgraph format (default = False). - verbose -- Verbose output (default = False). - test -- Fetch only 10 regions form each BED files for test. """ for i, t in enumerate(self.types): if verbose: print("Loading file ", self.files[self.names[i]], file=sys.stderr) if t not in ["regions", "genes"] and verbose: print("Cannot load objects", file=sys.stderr) if t == "regions": regions = GenomicRegionSet(self.names[i]) if is_bedgraph: regions.read_bedgraph( os.path.abspath(self.files[self.names[i]])) else: regions.read_bed(os.path.abspath( self.files[self.names[i]])) if test: regions.sequences = regions.sequences[0:11] self.objectsDict[self.names[i]] = regions elif t == "genes": genes = GeneSet(self.names[i]) genes.read( os.path.abspath(self.files[self.names[i]]) ) # Here change the relative path into absolute path self.objectsDict[self.names[i]] = genes
def load_objects(self, is_bedgraph, verbose=False, test=False): """Load files and initialize object. *Keyword arguments:* - is_bedgraph -- Whether regions are in bedgraph format (default = False). - verbose -- Verbose output (default = False). - test -- Fetch only 10 regions form each BED files for test. """ for i, t in enumerate(self.types): if verbose: print("Loading file ", self.files[self.names[i]], file = sys.stderr) if t not in ["regions", "genes"] and verbose: print("Cannot load objects", file=sys.stderr) if t == "regions": regions = GenomicRegionSet(self.names[i]) if is_bedgraph: regions.read_bedgraph(os.path.abspath(self.files[self.names[i]])) else: if test: g = GenomicRegionSet(self.names[i]) g.read_bed(os.path.abspath(self.files[self.names[i]])) regions.sequences = g.sequences[0:11] else: regions.read_bed(os.path.abspath(self.files[self.names[i]])) # Here change the relative path into absolute path self.objectsDict[self.names[i]] = regions elif t == "genes": genes = GeneSet(self.names[i]) genes.read(os.path.abspath(self.files[self.names[i]])) # Here change the relative path into absolute path self.objectsDict[self.names[i]] = genes
def match_ms_tags(self, field, test=False): """Add more entries to match the missing tags of the given field. For example, there are tags for cell like 'cell_A' and 'cell_B' for reads, but no these tag for regions. Then the regions are repeated for each tags from reads to match all reads. *Keyword arguments:* - field -- Field to add extra entries. """ # check regions or reads have empty tag altypes = self.fieldsDict[field].keys() if "ALL" in altypes: altypes.remove("ALL") for name in self.fieldsDict[field]["ALL"]: i = self.names.index(name) for t in altypes: # print("\t"+t) n = name + "_" + t # print("\t\t"+n) self.names.append(n) self.types.append(self.types[i]) self.files[n] = self.files[name] # types = self.get_types(name,skip_all=True) # print("************") # print(types) for f in self.fields[3:]: if f == field: try: self.fieldsDict[f][t].append(n) except: self.fieldsDict[f][t] = [n] else: try: self.fieldsDict[f][self.get_type( name=name, field=f)].append(n) except: self.fieldsDict[f][self.get_type( name=name, field=f)] = [n] # for f in self.fieldsDict.keys(): # for ty in types: # try: self.fieldsDict[f][ty].append(n) # except: pass if self.types[i] == "regions": g = GenomicRegionSet(n) g.read_bed(self.files[name]) if test: g.sequences = g.sequences[0:11] self.objectsDict[n] = g self.trash.append(name)