Esempio n. 1
0
min_gene_len = int(argv[6])
min_rpkm = float(argv[7])
min_diff = log(
    float(argv[8]), 2
)  # minimum fold difference between the highest and lowest to be considered (filter)

# make cdt file
labels = rpkm_file.readline().rstrip('\n').split('\t')[2::]
my_str = "UID\tNAME\tGWEIGHT"
my_str2 = "EWEIGHT\t\t"
for label in labels:
    my_str += "\t" + label
    my_str2 += "\t1"
cdt_file.write(my_str + '\n' + my_str2)

counts = E.Counter()

for line in rpkm_file:
    data = line[:-1].split("\t")
    counts.input += 1
    if int(data[1]) < min_gene_len:  # exclude genes that are too short
        counts.skipped_length += 1
        continue
    name = data[0]
    la = map(float, data[2:])

    if max(la) < min_rpkm:
        counts.skipped_rpkm += 1
        continue

    background_file.write(name + "\n")