Example #1
0
parser = argparse.ArgumentParser(
    description=
    'takes bed as input, get the middle point and extend it to both sides')
parser.add_argument('-i', required=True, help='input')
parser.add_argument('-o', required=True, help='output')
parser.add_argument('-g', required=True, help='genomeFile')
parser.add_argument('-w', required=True, help='windowSize')
args = parser.parse_args()
bedFile = args.i
output = args.o
genome = args.g
windowSize = int(args.w)

chrDict = {}

for line in open(genome, 'r'):
    ll = line.split('\t')
    chrDict[ll[0]] = int(ll[1])


def line2newLine(line):
    bedLine = bed.bedline(line)
    chromosome = bedLine.chromosome()
    start = bedLine.start()
    newEnd = min(start + windowSize, chrDict[chromosome])
    return bedLine.newline(start, newEnd)


generalUtils.lineBasedFileOperation(bedFile, output, line2newLine, [])
Example #2
0
parser = argparse.ArgumentParser(description='fix fragment length if possible')
parser.add_argument('-i', required=True, help='<Required> input')
parser.add_argument('-c1',
                    required=True,
                    help='<Required> start position tab for first file')
parser.add_argument('-c2',
                    required=True,
                    help='<Required> start position tab for second file')
parser.add_argument('-o', required=True, help='<Required> output')

args = parser.parse_args()
input = args.i
output = args.o
s1tab = int(args.c1) - 1
s2tab = int(args.c2) - 1


def bedClosest2distance(line, s1tab, s2tab):
    ll = line.split('\t')
    start1 = int(ll[s1tab])
    end1 = int(ll[s1tab + 1])
    start2 = int(ll[s2tab])
    end2 = int(ll[s2tab + 1])
    pos1 = generalUtils.mean([start1, end1])
    pos2 = generalUtils.mean([start2, end2])
    distance = pos2 - pos1
    return str(distance)


generalUtils.lineBasedFileOperation(input, output, bedClosest2distance,
                                    [s1tab, s2tab])
parser.add_argument('-o', required= True, help='output')
parser.add_argument('-w', required= True, help='windowSize')
parser.add_argument('-g', required= False, default=False, help='genomeFile')
parser.add_argument('--randomMid', required= False, action='store_true', help='for cases of .5 middle point, randomly select between positions 0 or 1')
args = parser.parse_args()
bedFile = args.i
output = args.o
windowSize = int(args.w)

if args.g:
    chromosomeSizes = {}
    for line in open(args.g, 'r'):
        ll = line.split('\t')
        chromosomeSizes[ll[0]] = int(ll[1])

def getInterval(line, randomness=False):
    bedLine = bed.bedline(line)
    middlePoint = bedLine.midpoint()
    start = middlePoint - windowSize
    end = middlePoint + windowSize
    if args.g:
        chromosome = bedLine.chromosome()
        chrEnd = chromosomeSizes[chromosome]
        if start > 0 and end < chrEnd:
            return bedLine.newline(start, end)
        return False
    return bedLine.newline(start, end)
    

generalUtils.lineBasedFileOperation(bedFile, output, getInterval, [])
Example #4
0
#!/usr/bin/env python
import generalUtils
import argparse
import gff

parser = argparse.ArgumentParser(
    description='prints a meta field from gff by order')
parser.add_argument('-i', required=True, help='<Required> input')
parser.add_argument('-o', required=True, help='<Required> output')
parser.add_argument('-f', required=True, help='<Required> field of interest')
args = parser.parse_args()

generalUtils.lineBasedFileOperation(args.i, args.o,
                                    gff.getGeneInformationFromGFFline,
                                    [args.f])
Example #5
0
import sys
import argparse
import generalUtils
import fasta
from sequence import DNA

parser = argparse.ArgumentParser(
    description='convert cufflinks output to bed file with counts')
parser.add_argument('-i', required=True, help='<Required> input')
parser.add_argument('-s', required=True, help='<Required> strand')
parser.add_argument('-o', required=True, help='<Required> output')

args = parser.parse_args()


def cuffLinksLine2bedLine(line, strand):
    ll = line.split('\t')
    locus = ll[6]
    locusL = locus.split(':')
    chromosome = locusL[0]
    interval = locusL[1]
    intervalL = interval.split('-')
    start = intervalL[0]
    end = intervalL[1]
    FPKM = ll[9]
    LL = [chromosome, start, end, strand, FPKM]
    return '\t'.join(LL)


generalUtils.lineBasedFileOperation(args.i, args.o, cuffLinksLine2bedLine,
                                    [args.s])