Beispiel #1
0
def read_stream(stream, stratified=False):
    total, bad = 0, 0
    data = {}
    for row in reader(stream):
        if not stratified:
            key, values = "#", row
        else:
            key, values = row[0], row[1:]
        total += len(values)
        values2 = []
        for v in values:
            try:
                values2.append(float(v))
            except:
                warn("bad value:", v)
                bad += 1
        inner = data.setdefault(key, [])
        inner += values2
    if bad > 0:
        warn("bad values: %d (%.2f%%)" % (bad, 100 * bad / float(total)))
    data = {k: v for k, v in data.items() if len(v) > 0}
    return data
# ---------------------------------------------------------------
# load all data
# ---------------------------------------------------------------

dictTableData = {}
# modified for faster looking up 4/2/2015
dictFeatureIndex = {}

for iDex, strPath in enumerate(args.input):
    print >> sys.stderr, "Loading", iDex + 1, "of", len(
        args.input), ":", strPath
    aastrData = []
    strColhead = path2name(strPath)
    with open(strPath) as fh:
        for astrItems in reader(fh):
            aastrData.append(
                [astrItems[args.key_col], astrItems[args.val_col]]),
    if args.strip_comments:
        aastrData = [astrRow for astrRow in aastrData if astrRow[0][0] != "#"]
    if args.use_headers:
        strColhead = aastrData[0][1]
    if args.strip_headers:
        aastrData = aastrData[1:]
    if args.key_pattern:
        aastrData = [
            astrRow for astrRow in aastrData
            if re.search(args.key_pattern, astrRow[0])
        ]
    for strFeature, strValue in aastrData:
        if strFeature not in dictFeatureIndex:
aFloatableFields = ["s", "alpha", "zorder"]
# argument parsing ( python argparse )
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', help='')
parser.add_argument('-o', '--output', help='')
parser.add_argument('-n', '--logmin', type=float, help='')
parser.add_argument('-x', '--logmax', type=float, help='')
args = parser.parse_args()

# parse the input file for x,y values and kwargs
aX, aY = [], []
kwargs = {}
with open(args.input) as fh:
    aHeaders = fh.readline().strip().split("\t")
    for aItems in reader(fh):
        aX.append(float(aItems[0]))
        aY.append(float(aItems[1]))
        if len(aItems) > 2:
            for i, value in enumerate(aItems[2:]):
                header = aHeaders[i + 2]
                value = float(value) if header in aFloatableFields else value
                kwargs.setdefault(header, []).append(value)

# execute plot
fig = plt.figure()
fig.set_size_inches(5, 5)
ax = plt.subplot(111)

# note: if args.logmin/max not specified, they will pass None ( which ZillPlot expects )
zillplot(ax, aX, aY, logmin=args.logmin, logmax=args.logmax, **kwargs)
Beispiel #4
0
def describe_median_rank ( m ):
    """ """
    cuts = [0.05, 0.2, 0.4, 0.6, 0.8, 0.95, 1.0]
    names = ["very low", "low", "medium low", "trivial", "medium high", "high", "very high"]
    for cut, name in zip( cuts, names ):
        if m < cut:
            return name

# ---------------------------------------------------------------
# counting
# ---------------------------------------------------------------

counter = 0
kmer2term = {}
kmer2rank = {}
for items in reader( fh ):
    counter += 1
    term = items[0].strip()
    original = term
    if simplify:
        term = line.lower()
        term = re.sub( "[^a-z0-9]", " ", term )
        term = re.sub( " +", " ", term )
    kmers = term.split( " " ) if args.words else [term[j:j+k] for j in range( 0, len( term ) - k + 1 )]
    for kmer in kmers:
        temp = kmer2term.setdefault( kmer, {} )
        temp[original] = 1
        temp = kmer2rank.setdefault( kmer, {} )
        temp[counter] = 1

# ---------------------------------------------------------------
Beispiel #5
0
total = 0
bad = 0

parser = argparse.ArgumentParser()
parser.add_argument(
    '-z',
    "--zerozero",
    action="store_true",
    help="if set: 0,0 points EXCLUDED",
)
args = parser.parse_args()

# read in data
rows = []
for row in reader(sys.stdin):
    total += 1
    if len(row) == 2:
        row = ["<#s>"] + row
    elif len(row) == 3 and total == 1:
        print >> sys.stderr, "stratifying on first field, e.g.:", row[0]
    try:
        row[1] = float(row[1])
        row[2] = float(row[2])
        rows.append(row)
    except:
        print >> sys.stderr, "ignoring", "\t".join(map(str, row))
        bad += 1

# stratify
data = {}
Beispiel #6
0
def load_from_handle(fh):
    return coerce([row for row in reader(fh)])