Beispiel #1
0
 def __init__(self, source=None, skip_remap=False, verbose=True):
     """ Constructor can "open" ( i ) list of lists ( ii ) tab delimitted file/STDIN """
     # establish the table from: ( i ) list of lists or ( ii ) file ( handle )
     if isinstance(source, list):
         self.data = [aRow[:] for aRow in source]
         self.source = "<list of lists>"
     else:
         with try_open(source) if isinstance(source,
                                             str) else sys.stdin as fh:
             self.data = [
                 row for row in csv.reader(fh,
                                           delimiter="\t",
                                           quotechar="",
                                           quoting=csv.QUOTE_NONE)
             ]
             self.source = source if source is not None else "<stdin>"
     # track transposition status
     self.istransposed = False
     # track verbosity status (used by the reporter)
     self.isverbose = verbose
     # attempt to set up the initial index for table
     if not skip_remap:
         self.remap()
         self.report("new table with size", self.size())
     else:
         self.report("WARNING: table loaded in unmapped mode")
Beispiel #2
0
def read_fasta_bp(path, full_headers=False):
    fdict = OrderedDict()
    with try_open(path, "rU") as fh:
        for record in SeqIO.parse(fh, "fasta"):
            header = record.name.rstrip("|")
            if full_headers:
                header = " ".join([header, record.description])
            fdict[header] = str(record.seq).upper()
    return fdict
Beispiel #3
0
 def write(self, path=None):
     fh = sys.stdout
     if path is not None:
         fh = zu.try_open(path, "w")
     W = csv.writer(fh, csv.excel_tab)
     W.writerow([self.origin] + self.colheads)
     for r, row in zip(self.rowheads, self.data):
         W.writerow([r] + row)
     if path is not None:
         fh.close()
Beispiel #4
0
 def dump(self, output_file=None):
     """ Print the table to a file """
     fh = try_open(output_file,
                   "w") if output_file is not None else sys.stdout
     dumper = csv.writer(fh,
                         delimiter="\t",
                         quotechar="",
                         quoting=csv.QUOTE_NONE)
     for row in self.data:
         dumper.writerow(row)
     fh.close()
Beispiel #5
0
def read_fasta(path, full_headers=False):
    fdict = OrderedDict()
    with try_open(path) as fh:
        for line in fh:
            line = line.strip()
            if line[0] == ">":
                header = line[1:]
                if not full_headers:
                    header = header.split()[0].rstrip("|")
            else:
                fdict[header] = fdict.get(header, "") + line.upper()
    return fdict
Beispiel #6
0
 def write(self, path=None, gzip=False):
     """ 
     **** implement gzipped writing ****
     """
     with (try_open(path) if path is not None else sys.stdout) as fh:
         writer = csv.writer(fh,
                             delimiter="\t",
                             quotechar="",
                             quoting=csv.QUOTE_NONE)
         writer.writerow([self.origin] + list(self.colheads))
         for rowhead, row in zip(self.rowheads, self.data):
             writer.writerow([rowhead] + list(row))
Beispiel #7
0
def main( ):
    args = get_args( )
    stream = sys.stdin
    if args.stream is not None:
        stream = try_open( args.stream )
    data = [row for row in csv.reader( stream, csv.excel_tab )]
    excel( data,
           trim=args.trim,
           limit=args.limit,
           norm=args.norm,
           align_right=args.align_right,
           itunes=args.itunes,
           )
Beispiel #8
0
def write_fasta(fdict, path=None, wrap=None, sort=False):
    fh = sys.stdout
    if path is not None:
        fh = try_open(path, "w")
    order = sorted(fdict) if sort else fdict.keys()
    for header in order:
        seq = fdict[header]
        if header[0] != ">":
            header = ">" + header
        print(header, file=fh)
        if wrap is None:
            print(seq, file=fh)
        else:
            while len(seq) > wrap:
                print(seq[0:wrap], file=fh)
                seq = seq[wrap:]
            if len(seq) > 0:
                print(seq, file=fh)
    fh.close()
    return None
Beispiel #9
0
 def load_from_file(self, path):
     self.load_from_file_handle(zu.try_open(path))
Beispiel #10
0
                    action="store_true",
                    help="print file2 lines that did not match")
parser.add_argument("--het",
                    action="store_true",
                    help="allow file 2 lines to have unequal lengths")
args = parser.parse_args()

# adjust to base-0 indexing for python
args.key1 -= 1
args.key2 -= 1

# load second file to dictionary
lengths2 = []
d = {}
headers2 = None
with try_open(args.file2) as fh:
    for items in csv.reader(fh, dialect="excel-tab"):
        lengths2.append(len(items))
        if headers2 is None and args.head2:
            headers2 = c_sep.join(items)
            continue
        key = items[args.key2]
        d.setdefault(key, {})["\t".join(items)] = 1
print("finished loading file2", file=sys.stderr)

# make dummy line to add when join fails
if len(set(lengths2)) != 1:
    warn("file2 lines have unequal lengths")
    if args.het:
        dummyline2 = c_na
    else:
Beispiel #11
0
    for directory in aDirectories:
        aMatchingFiles += glob.glob(
            os.path.join(subroot, directory, args.file_pattern))
aMatchingFiles = [f for f in aMatchingFiles if not os.path.islink(f)]

# ---------------------------------------------------------------
# find
# ---------------------------------------------------------------
"""find the files that match the file pattern; if we're not in swap mode, then
print them ( and matching lines ) to the screen; else save them for find-replace
in the next part"""

aMatchingFiles2 = []
for path in aMatchingFiles:
    already_hit = False
    fh = try_open(path)
    for i, line in enumerate(fh):
        line = line.rstrip("\n")
        if re.search(args.text_pattern, line):
            if not already_hit:
                aMatchingFiles2.append(path)
                already_hit = True
                if args.swap_pattern is None:
                    print path
            if args.swap_pattern is None:
                print "\t", "%5d" % (i + 1), line
    fh.close()
# now, matching files stores files that match the file pattern AND hit the text pattern
aMatchingFiles = aMatchingFiles2

# ---------------------------------------------------------------
Beispiel #12
0
def read_csv(path, headers=False):
    """ shared file reader """
    fh = try_open(path)
    if headers:
        fh.readline()
    return csv.reader(fh, dialect="excel-tab")
Beispiel #13
0
def load_from_path(path):
    with try_open(path) as fh:
        return load_from_handle(fh)