def loadGTF(line, fp, fname, labels, regions, transcriptID, transcript_id_designator, defaultGroup): """ Like loadBED, but for a GTF file This is largely a copy of what's in deeptoolsintervals """ file_label = dti.findRandomLabel(labels, os.path.basename(fname)) # handle the first line cols = line.split("\t") if cols[2].lower() == transcriptID.lower(): label, name = loadGTFtranscript(cols, file_label, defaultGroup, transcript_id_designator) if label is not None: if label not in labels: labels[label] = len(labels) regions.append(dict()) labelIdx = labels[label] regions[labelIdx][name] = len(regions[labelIdx]) for line in fp: if not isinstance(line, str): line = line.decode('ascii') if not line.startswith('#'): cols = line.strip().split('\t') if len(cols) == 0: continue if cols[2].lower() == transcriptID: label, name = loadGTFtranscript(cols, file_label, defaultGroup, transcript_id_designator) if label is None: continue if label not in labels: labels[label] = len(labels) regions.append(dict()) labelIdx = labels[label] regions[labelIdx][name] = len(regions[labelIdx])
def loadBED(line, fp, fname, labelColumn, labels, regions, defaultGroup): """ Given a first line, possibly a label column and a list of labels and regions, add the labels and regions in the file to them """ # This is largely parseBED from deeptoolsintervals labelIdx = None localRegions = {} cols = line.strip().split("\t") if labelColumn is not None: label = cols.pop(labelColumn) if label not in labels: labels[label] = len(labels) labelIdx = labels[label] if labelIdx >= len(regions): regions.append(localRegions) else: localRegions = regions[labelIdx] if len(cols) >= 6: name = cols[3] else: name = "{0}:{1}-{2}".format(cols[0], cols[1], cols[2]) localRegions[name] = len(localRegions) for line in fp: if line.startswith("#") and labelColumn is None: if len(localRegions) > 0: label = line[1:].strip() if len(label): labels[dti.findRandomLabel(labels, label)] = len(labels) else: labels[dti.findRandomLabel( labels, os.path.basename(fname))] = len(labels) regions.append(localRegions) localRegions = dict() continue elif line.startswith("#") and labelColumn is not None: continue cols = line.strip().split("\t") if len(cols) < 3: continue if labelColumn is not None: label = cols.pop(labelColumn) if label not in labels: labels[label] = len(labels) labelIdx = labels[label] if labelIdx >= len(regions): regions.append({}) localRegions = regions[labelIdx] if len(cols) >= 6: name = cols[3] else: name = "{0}:{1}-{2}".format(cols[0], cols[1], cols[2]) name = dti.findRandomLabel(localRegions, name) localRegions[name] = len(localRegions) # Handle the last group if there is no label if labelIdx is None and len(localRegions) > 0: if defaultGroup is not None: labels[dti.findRandomLabel(labels, defaultGroup)] = len(labels) else: labels[dti.findRandomLabel(labels, os.path.basename(fname))] = len(labels) regions.append(localRegions)
def loadBED(line, fp, fname, labelColumn, labels, regions, defaultGroup): """ Given a first line, possibly a label column and a list of labels and regions, add the labels and regions in the file to them """ # This is largely parseBED from deeptoolsintervals labelIdx = None localRegions = {} cols = line.strip().split("\t") if labelColumn is not None: label = cols.pop(labelColumn) if label not in labels: labels[label] = len(labels) labelIdx = labels[label] if labelIdx >= len(regions): regions.append(localRegions) else: localRegions = regions[labelIdx] if len(cols) >= 6: name = cols[3] else: name = "{0}:{1}-{2}".format(cols[0], cols[1], cols[2]) localRegions[name] = len(localRegions) for line in fp: if line.startswith("#") and labelColumn is None: if len(localRegions) > 0: label = line[1:].strip() if len(label): labels[dti.findRandomLabel(labels, label)] = len(labels) else: labels[dti.findRandomLabel(labels, os.path.basename(fname))] = len(labels) regions.append(localRegions) localRegions = dict() continue elif line.startswith("#") and labelColumn is not None: continue cols = line.strip().split("\t") if len(cols) < 3: continue if labelColumn is not None: label = cols.pop(labelColumn) if label not in labels: labels[label] = len(labels) labelIdx = labels[label] if labelIdx >= len(regions): regions.append({}) localRegions = regions[labelIdx] if len(cols) >= 6: name = cols[3] else: name = "{0}:{1}-{2}".format(cols[0], cols[1], cols[2]) name = dti.findRandomLabel(localRegions, name) localRegions[name] = len(localRegions) # Handle the last group if there is no label if labelIdx is None and len(localRegions) > 0: if defaultGroup is not None: labels[dti.findRandomLabel(labels, defaultGroup)] = len(labels) else: labels[dti.findRandomLabel(labels, os.path.basename(fname))] = len(labels) regions.append(localRegions)