Ejemplo n.º 1
0
def main():
    """ Only called if run not interactively
    """
    import optparse
    import shutil
    import glob
    import dec

    # parse command line
    optparser = optparse.OptionParser()

    optparser.add_option("-b", "--bam", help="sorted bam format alignment file.",
                         default="", type="string", dest="b")
    optparser.add_option("-f", "--fasta", help="reference genome in fasta format.",
                         default="", type="string", dest="f")
    optparser.add_option("-a", "--alpha", help="alpha in dpm sampling <0.01>", default=0.01,
                         type="float", dest="a")
    optparser.add_option("-w", "--windowsize", help="window size <201>", default=201,
                         type="int", dest="w")
    optparser.add_option("-s", "--winshifts", help="number of window shifts <3>", default=3,
                         type="int", dest="s") # window shiftings, such that each base is covered up to win_shifts times
    optparser.add_option("-i", "--sigma", help="value of sigma to use when calling SNVs", default = 0.01,
                         type="float", dest="i")
    optparser.add_option("-x", "--maxcov", help="approximate maximum coverage allowed", default=10000,
                         type="int", dest="x")
    optparser.add_option("-r", "--region", help="region in format 'chr:start-stop', eg 'ch3:1000-3000'", default='',
                         type="string", dest="r")
    optparser.add_option("-k","--keep_files",help="keep intermediate files (Gibbs sampling)", default=False,
                         action="store_true", dest="k")
    (options, args) = optparser.parse_args()

    sholog.info(' '.join(sys.argv))
    in_bam = options.b
    in_fasta = options.f
    keep_all_files = options.k
    step = options.w
    win_shifts = options.s
    max_coverage = options.x
    sigma = options.i
    alpha = options.a
    region = options.r

    in_stem=os.path.split(in_bam)[1].split('.')[0]

    if not os.path.exists('snv/SNV.txt'):
        # run dec.py
        sholog.debug('running dec.py')
        dec.main(in_bam,in_fasta, step, win_shifts, max_coverage, sigma, region, keep_all_files, alpha)

        # copy file and run fas2reads
        shutil.move('%s.cor.fas' % in_stem, '%s_cor.fas' % in_stem)
        sholog.debug('running fas2reads')

        retcode = run_f2r('%s_cor.fas' % in_stem)
        if retcode is not 0:
            sholog.error('fas2reads did not return 0')
            sys.exit()

        # run contain
        retcode = run_contain('%s_cor' % in_stem)
        if retcode is not 0:
            sys.exit()

        # run mm.py (this might become a module)
        retcode = run_mm('%s_cor' % in_stem)
        if retcode is not 0:
            sys.exit()

        # run EM freqEst
        retcode = run_freqEst('%s_cor' % in_stem)
        if retcode is not 0:
            sys.exit()



    # run snv.py
    sholog.debug('running snv.py')
    retcode = run_snv(in_fasta, in_bam, sigma, step/win_shifts)
    if retcode is not 0:
        sholog.error('snv calling failed')

    # tidy snvs
    try:
        os.mkdir('snv')
    except:
        pass
    for snv_file in glob.glob('./SNV*'):
        shutil.move(snv_file, 'snv/')
Ejemplo n.º 2
0
def main():
    """ Only called if run not interactively
    """
    import optparse
    import shutil
    import dec
    import s2f

    # parse command line
    optparser = optparse.OptionParser()

    optparser.add_option("-f",
                         "--readsfile",
                         help="file with reads <.fas or .far format>",
                         default="",
                         type="string",
                         dest="f")
    optparser.add_option("-j",
                         "--iterations",
                         help="iterations in dpm sampling <1000>",
                         default=1000,
                         type="int",
                         dest="j")
    optparser.add_option("-a",
                         "--alpha",
                         help="alpha in dpm sampling <0.01>",
                         default=0.01,
                         type="float",
                         dest="a")
    optparser.add_option("-w",
                         "--windowsize",
                         help="window size in <201>",
                         default=201,
                         type="int",
                         dest="w")
    optparser.add_option(
        "-t",
        "--threshold",
        help="if similarity is less, throw reads away... <default=0.7>",
        type="float",
        dest="threshold",
        default=0.7)
    optparser.add_option("-n",
                         "--no_pad_insert",
                         help="do not insert padding gaps <default=insert>",
                         action="store_false",
                         default=True,
                         dest="pad")
    optparser.add_option("-r", "--ref", type="string", default="", dest="ref")
    optparser.add_option(
        "-o",
        "--output",
        help="output suffix for alignment: must be '.far' or none for stdout",
        type="string",
        dest="o")
    optparser.add_option(
        "-s",
        "--winshifts",
        help="number of window shifts <3>",
        default=3,
        type="int",
        dest="s"
    )  # window shiftings, such that each base is covered up to win_shifts times
    optparser.add_option(
        "-k",
        "--keep_files",
        help="keep intermediate files (Gibbs sampling) <default=False>",
        default=False,
        action="store_true",
        dest="k")

    (options, args) = optparser.parse_args()

    sholog.info(' '.join(sys.argv))
    in_file = options.f
    keep_all_files = options.k
    step = options.w
    win_shifts = options.s
    fas_reads = options.f
    iters = options.j
    alpha = options.a

    try:
        [in_stem, in_format] = [
            os.path.split(in_file)[1].split('.')[0],
            os.path.split(in_file)[1].split('.')[1]
        ]
    except IndexError:
        print 'The input file must be filestem.format'
        sholog.error('The input file must be filestem.format')
        sys.exit()

    if in_format != 'far':
        ref_file = options.ref
        out_file = os.path.join(os.getcwd(), in_stem + '.far')
        thresh = options.threshold
        pad_insert = options.pad
        sholog.debug('running s2f.py')
        s2f.main(ref_file, in_file, out_file, thresh, pad_insert,
                 keep_all_files)

        # run dec.py
        sholog.debug('running dec.py')
        dec.main(out_file, step, win_shifts, keep_all_files, iters, alpha)
    else:
        # run dec.py
        sholog.debug('running dec.py')
        dec.main(in_file, step, win_shifts, keep_all_files, iters, alpha)

    # copy file and run fas2reads
    shutil.move('%s.cor.fas' % in_stem, '%s_cor.fas' % in_stem)
    sholog.debug('running fas2reads')

    retcode = run_f2r('%s_cor.fas' % in_stem)
    if retcode is not 0:
        sholog.error('fas2reads did not return 0')
        sys.exit()

    # run contain
    retcode = run_contain('%s_cor' % in_stem)
    if retcode is not 0:
        sys.exit()

    # run mm.py (this might become a module)
    retcode = run_mm('%s_cor' % in_stem)
    if retcode is not 0:
        sys.exit()

    # run EM freqEst
    retcode = run_freqEst('%s_cor' % in_stem)
    if retcode is not 0:
        sys.exit()
Ejemplo n.º 3
0
    (options, args) = optparser.parse_args()
    del(args)
    sholog.info(' '.join(sys.argv))

    in_stem = '.'.join(os.path.split(options.b)[1].split('.')[:-1])

    if options.p:
        # amplicon_mode, run only diri_sampler and snv.py
        amplian.main(in_bam=options.b, in_fasta=options.f,
                     max_coverage=options.x, alpha=options.a)
    if not os.path.exists('snv/SNV.txt'):
        # 1. run dec.py
        sholog.debug('running dec.py')
        dec.main(in_bam=options.b, in_fasta=options.f, win_length=options.w,
                 win_shifts=options.s, max_coverage=options.x,
                 region=options.r, keep_files=options.k, alpha=options.a)

        # 2. copy file and run fas2read to convert from fasta
        shutil.move('%s.cor.fas' % in_stem, '%s_cor.fas' % in_stem)
        sholog.debug('running fas2reads')
        my_prog = 'perl -I %s %s' % (dn + '/perllib',
                                     os.path.join(dn, 'fas2read.pl'))
        my_arg = " -f %s_cor.fas" % in_stem
        assert os.path.isfile("%s_cor.fas" % in_stem), \
            'File %s_cor.fas not found' % in_stem
        retcode_f2r = run_child(my_prog, my_arg)
        if retcode_f2r:
            sholog.error('fas2read did not return 0')
            sys.exit('Something went wrong in fas2read')
        else:
Ejemplo n.º 4
0
    in_stem = '.'.join(os.path.split(options.b)[1].split('.')[:-1])

    if options.p:
        # amplicon_mode, run only diri_sampler and snv.py
        amplian.main(in_bam=options.b,
                     in_fasta=options.f,
                     max_coverage=options.x,
                     alpha=options.a)
    if not os.path.exists('snv/SNV.txt'):
        # 1. run dec.py
        sholog.debug('running dec.py')
        dec.main(in_bam=options.b,
                 in_fasta=options.f,
                 win_length=options.w,
                 win_shifts=options.s,
                 max_coverage=options.x,
                 region=options.r,
                 keep_files=options.k,
                 alpha=options.a)

        # 2. copy file and run fas2read to convert from fasta
        shutil.move('%s.cor.fas' % in_stem, '%s_cor.fas' % in_stem)
        sholog.debug('running fas2reads')
        my_prog = 'perl -I %s %s' % (dn + '/perllib',
                                     os.path.join(dn, 'fas2read.pl'))
        my_arg = " -f %s_cor.fas" % in_stem
        assert os.path.isfile("%s_cor.fas" % in_stem), \
            'File %s_cor.fas not found' % in_stem
        retcode_f2r = run_child(my_prog, my_arg)
        if retcode_f2r:
            sholog.error('fas2read did not return 0')
Ejemplo n.º 5
0
def install_dependencies():
    os.system('pip install pillow')
    os.system('pip3 install pillow')
    return


def raise_err():
    help_content = '''
    Unknown argument: "{}"

    Type "python {} -h" to see the usage.
    '''.format(sys.argv[1], sys.argv[0])
    print(help_content)
    return


if __name__ == '__main__':
    if len(args) == 1 or (len(args) == 2 and args[1] == '-h'):
        layout_help()
    elif len(args) == 2 and args[1] == '-i':
        install_dependencies()
    elif len(args) == 3 and args[1] in rds:
        import enc
        import dec
        if args[1] == rds[0]:
            enc.main(args[2])
        else:
            dec.main(args[2])
    else:
        raise_err()
Ejemplo n.º 6
0
def main():
    start_time = time.time()
    latitude = 45.0619883  # latitude  N
    longitude = 7.6602814  # longitude E
    elevation = 250  # meters above the sea

    df = pd.read_csv('/home/bottaccioli/per_sara/2013.csv',
                     sep=";",
                     index_col='Date Time',
                     decimal=',')
    df.index = pd.to_datetime(df.index)
    df.index = df.index.tz_localize('Europe/Rome', ambiguous='infer')

    models = ['Karatasou']
    #models = ['Engerer2','Skartevit2','Skartevit1','Erbs','Reindl']
    #models=['Karatasou','Ruiz','Skartevit1','Engerer2','Erbs','Reindl']
    for model in models:
        print(model)
        i = 0
        df = dec.main(latitude, longitude, elevation, model, df)
        geodf = gpd.read_file("/home/bottaccioli/per_sara/primo/primo.shp",
                              crs='4326')
        geodf = geodf.set_index(['FID'])
        radiation = geodf.copy()
        rad = geodf.copy()
        radiation = radiation.drop(['geometry'], axis=1)
        month = {
            1: '17',
            2: '47',
            3: '75',
            4: '105',
            5: '135',
            6: '162',
            7: '198',
            8: '228',
            9: '258',
            10: '288',
            11: '318',
            12: '344'
        }
        hour = {
            4: '04',
            5: '05',
            6: '06',
            7: '07',
            8: '08',
            9: '09',
            10: '10',
            11: '11',
            12: '12',
            13: '13',
            14: '14',
            15: '15',
            16: '16',
            17: '17',
            18: '18',
            19: '19',
            20: '20',
            21: '21',
            22: '22'
        }
        minute = {0: '00', 15: '25', 30: '50', 45: '75'}
        Linke = {
            1: 3.5,
            2: 4.3,
            3: 4,
            4: 4.2,
            5: 4.6,
            6: 4.6,
            7: 4.4,
            8: 4.5,
            9: 4.3,
            10: 4,
            11: 4.4,
            12: 4.4
        }
        directory = '/home/bottaccioli/raster_crocetta/'

        for dfix in df.index:
            i = i + 1
            print model, str(i) + "of" + str(len(df))

            if df.ix[dfix]['zenith'] < 85:
                index = dfix
                if index.dst().seconds != 0:
                    if dfix < pd.to_datetime('27/03/2013').tz_localize(
                            'Europe/Rome'):
                        index = dfix - pd.DateOffset(hours=1.25)
                    else:
                        index = dfix - pd.DateOffset(hours=1.00)
                    beam = directory + 'beam_' + month[
                        index.month] + '_' + hour[index.hour] + '.' + minute[
                            index.minute]
                    diff = directory + 'diff_' + month[
                        index.month] + '_' + hour[index.hour] + '.' + minute[
                            index.minute]
                    glob, gt = glob_real(beam, diff, df.ix[index, 'k_b'],
                                         df.ix[index, 'k_d'])
                    stats = point_value(geodf, glob, gt)
                    radiation[index] = stats.values()
                else:
                    beam = directory + 'beam_' + month[
                        index.month] + '_' + hour[index.hour] + '.' + minute[
                            index.minute]
                    diff = directory + 'diff_' + month[
                        index.month] + '_' + hour[index.hour] + '.' + minute[
                            index.minute]
                    glob, gt = glob_real(beam, diff, df.ix[index, 'k_b'],
                                         df.ix[index, 'k_d'])
                    stats = point_value(geodf, glob, gt)
                    if dfix < pd.to_datetime('27/03/2013').tz_localize(
                            'Europe/Rome'):
                        radiation[index -
                                  pd.DateOffset(hours=0.25)] = stats.values()
                    else:
                        print 'fava'
                        radiation[index] = stats.values()

                glob = None
        radiation = radiation.transpose()
        print("--- %s seconds ---" % (time.time() - start_time))
        radiation = radiation.join(df['T_ex'])
        radiation.to_csv('primo_rad.csv')