def __init__(self, source=None, skip_remap=False, verbose=True): """ Constructor can "open" ( i ) list of lists ( ii ) tab delimitted file/STDIN """ # establish the table from: ( i ) list of lists or ( ii ) file ( handle ) if isinstance(source, list): self.data = [aRow[:] for aRow in source] self.source = "<list of lists>" else: with try_open(source) if isinstance(source, str) else sys.stdin as fh: self.data = [ row for row in csv.reader(fh, delimiter="\t", quotechar="", quoting=csv.QUOTE_NONE) ] self.source = source if source is not None else "<stdin>" # track transposition status self.istransposed = False # track verbosity status (used by the reporter) self.isverbose = verbose # attempt to set up the initial index for table if not skip_remap: self.remap() self.report("new table with size", self.size()) else: self.report("WARNING: table loaded in unmapped mode")
def read_fasta_bp(path, full_headers=False): fdict = OrderedDict() with try_open(path, "rU") as fh: for record in SeqIO.parse(fh, "fasta"): header = record.name.rstrip("|") if full_headers: header = " ".join([header, record.description]) fdict[header] = str(record.seq).upper() return fdict
def write(self, path=None): fh = sys.stdout if path is not None: fh = zu.try_open(path, "w") W = csv.writer(fh, csv.excel_tab) W.writerow([self.origin] + self.colheads) for r, row in zip(self.rowheads, self.data): W.writerow([r] + row) if path is not None: fh.close()
def dump(self, output_file=None): """ Print the table to a file """ fh = try_open(output_file, "w") if output_file is not None else sys.stdout dumper = csv.writer(fh, delimiter="\t", quotechar="", quoting=csv.QUOTE_NONE) for row in self.data: dumper.writerow(row) fh.close()
def read_fasta(path, full_headers=False): fdict = OrderedDict() with try_open(path) as fh: for line in fh: line = line.strip() if line[0] == ">": header = line[1:] if not full_headers: header = header.split()[0].rstrip("|") else: fdict[header] = fdict.get(header, "") + line.upper() return fdict
def write(self, path=None, gzip=False): """ **** implement gzipped writing **** """ with (try_open(path) if path is not None else sys.stdout) as fh: writer = csv.writer(fh, delimiter="\t", quotechar="", quoting=csv.QUOTE_NONE) writer.writerow([self.origin] + list(self.colheads)) for rowhead, row in zip(self.rowheads, self.data): writer.writerow([rowhead] + list(row))
def main( ): args = get_args( ) stream = sys.stdin if args.stream is not None: stream = try_open( args.stream ) data = [row for row in csv.reader( stream, csv.excel_tab )] excel( data, trim=args.trim, limit=args.limit, norm=args.norm, align_right=args.align_right, itunes=args.itunes, )
def write_fasta(fdict, path=None, wrap=None, sort=False): fh = sys.stdout if path is not None: fh = try_open(path, "w") order = sorted(fdict) if sort else fdict.keys() for header in order: seq = fdict[header] if header[0] != ">": header = ">" + header print(header, file=fh) if wrap is None: print(seq, file=fh) else: while len(seq) > wrap: print(seq[0:wrap], file=fh) seq = seq[wrap:] if len(seq) > 0: print(seq, file=fh) fh.close() return None
def load_from_file(self, path): self.load_from_file_handle(zu.try_open(path))
action="store_true", help="print file2 lines that did not match") parser.add_argument("--het", action="store_true", help="allow file 2 lines to have unequal lengths") args = parser.parse_args() # adjust to base-0 indexing for python args.key1 -= 1 args.key2 -= 1 # load second file to dictionary lengths2 = [] d = {} headers2 = None with try_open(args.file2) as fh: for items in csv.reader(fh, dialect="excel-tab"): lengths2.append(len(items)) if headers2 is None and args.head2: headers2 = c_sep.join(items) continue key = items[args.key2] d.setdefault(key, {})["\t".join(items)] = 1 print("finished loading file2", file=sys.stderr) # make dummy line to add when join fails if len(set(lengths2)) != 1: warn("file2 lines have unequal lengths") if args.het: dummyline2 = c_na else:
for directory in aDirectories: aMatchingFiles += glob.glob( os.path.join(subroot, directory, args.file_pattern)) aMatchingFiles = [f for f in aMatchingFiles if not os.path.islink(f)] # --------------------------------------------------------------- # find # --------------------------------------------------------------- """find the files that match the file pattern; if we're not in swap mode, then print them ( and matching lines ) to the screen; else save them for find-replace in the next part""" aMatchingFiles2 = [] for path in aMatchingFiles: already_hit = False fh = try_open(path) for i, line in enumerate(fh): line = line.rstrip("\n") if re.search(args.text_pattern, line): if not already_hit: aMatchingFiles2.append(path) already_hit = True if args.swap_pattern is None: print path if args.swap_pattern is None: print "\t", "%5d" % (i + 1), line fh.close() # now, matching files stores files that match the file pattern AND hit the text pattern aMatchingFiles = aMatchingFiles2 # ---------------------------------------------------------------
def read_csv(path, headers=False): """ shared file reader """ fh = try_open(path) if headers: fh.readline() return csv.reader(fh, dialect="excel-tab")
def load_from_path(path): with try_open(path) as fh: return load_from_handle(fh)