예제 #1
0
def evidence_run(args):
    # Expanding summits
    chip_summit_regions = GenomicRegionSet("TFBS Summit Regions")
    chip_summit_regions.read(args.chip_file)

    for region in iter(chip_summit_regions):
        summit = int(region.data.split()[-1]) + region.initial
        region.initial = max(summit - (args.peak_ext / 2), 0)
        region.final = summit + (args.peak_ext / 2)

    # Calculating intersections
    mpbs_regions = GenomicRegionSet("MPBS Regions")
    mpbs_regions.read(args.mpbs_file)

    chip_summit_regions.sort()
    mpbs_regions.sort()

    tfbs_regions = GenomicRegionSet("TFBS Regions")

    for mpbs_region in mpbs_regions:
        if chip_summit_regions.include(mpbs_region):
            mpbs_region.name = mpbs_region.name.split(":")[0] + ":Y"
        else:
            mpbs_region.name = mpbs_region.name.split(":")[0] + ":N"
        tfbs_regions.add(mpbs_region)

    tfbs_regions.sort()

    tfbs_fname = os.path.join(args.output_location,
                              "{}.bed".format(args.output_prefix))
    tfbs_regions.write(tfbs_fname)
예제 #2
0
            if seqrec.id in organisms.values():
                seqs.append(seqrec)
        except:
            continue

    new_alignment = MultipleSeqAlignment(records=seqs)
    #print(len(new_alignment))

    AlignIO.write(new_alignment, "mm9_" + rg.name + ".fa", "fasta")

    process = subprocess.Popen([
        "/home/joseph/Apps/PhyloCSF/PhyloCSF", "29mammals",
        "mm9_" + rg.name + ".fa", "--removeRefGaps", "--strategy=omega",
        "--orf=StopStop3", "--minCodons=25", "--frames=3"
    ],
                               stdout=subprocess.PIPE)
    out, err = process.communicate()
    print(out)
    #print(out.split("\t")[2])
    #print(out.split("\t")[3])
    #print(out.split("\t")[4])

    data = rg.data.split("\t")
    score = out.split("\t")[2]
    rg.data = "\t".join([score] + data[1:])

bed.write(args.o)

# 29/9/2015
# python /projects/reg-gen/tools/phylocsf_check.py -i /projects/ig440396_dendriticcells/exp/RNASeq/expression/isofroms/deseq/new_bed/all_TCONs.bed -o all_TCONS_phyloCSF.bed -organism mm9 -mafdir /data/genome/mm9/multiz30way/maf/