def main():
    timeBegin = time.time()
    if (len(sys.argv) != 7):
        if (len(sys.argv) > 1
                and (sys.argv[1] == "--help" or sys.argv[1] == "-h")):
            print_help(0)
        else:
            print_help(1)
    #pathToGtf = "/reference/homo_sapiens/GRCh38/ensembl/Annotation/Genes/gtf/Homo_sapiens.GRCh38.83.gtf"
    #pathToSeq = "/reference/homo_sapiens/GRCh38/ensembl/Sequence/WholeGenomeFasta/Homo_sapiens.GRCh38.dna.primary_assembly.fa"
    random.seed(42)
    pathToGtf = sys.argv[1]
    pathToSeq = sys.argv[2]
    pathToConfig = sys.argv[3]
    pathToFastq = sys.argv[5]
    readType = sys.argv[6]

    gtfList = read_gtf(pathToGtf)
    exonList = get_exon_list(gtfList)
    transList = get_transcript_list(gtfList, exonList)
    geneList = get_gene_list(gtfList, transList)
    chrmList = read_genome(pathToSeq)
    uniqueFeatureList = get_list_of_unique_gtf_features(gtfList)
    get_exon_seq(exonList, chrmList)
    link_exons_trans_and_genes(gtfList, exonList, transList, geneList)
    # print_transcripts_with_seqs(transList)      # Debug link_exons_trans_and_genes()

    geneDict, transDict = create_gene_and_trans_lookup_dict(
        geneList, transList)
    print_gtf_statistics(exonList, transList, geneList)
    # find_trans_that_differ_by_1_exon(geneList, transList) # Uncomment for complete list
    readLength, desiredTransList, abundanceList, numOfReads = read_config(
        pathToConfig)

    numOfReads = int(sys.argv[4])

    if (readType != 'single' and readType != 'paired-fr-first'
            and readType != 'paired-fr-second'):
        exit_with_error("ERROR!!! Incorrect value for {}".format(readType))
    else:
        ### Paired end reads are not working yet ###
        if (readType == 'paired-fr-first' or readType == 'paired-fr-second'):
            exit_with_error(
                "ERROR!!! paired-fr-first and paired-fr-second \n"
                "not yet implemented. \n\n"
                "NOTE:: Both reads are tentatively found in the \n"
                "       INSERT class. The second read is not used.\n"
                "       The second read should definitely needs checked.\n")
        create_fastq_file(pathToFastq, desiredTransList, abundanceList,
                          numOfReads, readLength, transDict, transList,
                          exonList, readType)

    print("Unique features in Gtf : ")
    for feature in uniqueFeatureList:
        print("\t%s" % (feature))
    timeEnd = time.time()
    print("Run time : %s" % (timeEnd - timeBegin))
    sys.exit(0)
Exemple #2
0
def cross_validate(args):
    assert len(args['bw_key']) == len(args['bw'])
    if not os.path.exists(args['outfolder']):
        os.makedirs(args['outfolder'])
    args['phi0'] *= 1e-18  # correct units
    kf = KFold(n_splits=args['kfold'], random_state=args['rs'], shuffle=True)
    config = read_config()
    print('Load MC: {}'.format(config['IC_MC']['path']))
    mc = np.load(str(config['IC_MC']['path']))[:]
    mc = mc_cut(mc)
    if args['weights'] == 'pl':
        weights = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'],
                                       gamma=args['gamma'])
    elif args['weights'] == 'conv':
        weights = mc['conv']
    elif args['weights'] == 'conv+pl':
        diff_weight = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'],
                                           gamma=args['gamma'])
        weights = mc['conv'] + diff_weight
        print('Rates [1/yr]:')
        print(np.sum(mc['conv']) * np.pi * 1e7)
        print(np.sum(diff_weight) * np.pi * 1e7)
    else:
        print('{} is not a valid weights argument'.format(args['weights']))
        sys.exit(0)
    mc = append_fields(mc, 'cur_weight', weights)
    args['weights'] = 'default'
    model, mname = load_model(args['model'])
    bw_dict = dict()
    for i, key in enumerate(args['bw_key']):
        bw_dict[key] = args['bw'][i]
    lh_arr, zero_arr = [], []
    for train_index, val_index in kf.split(mc):
        args['no_save'] = True
        res_dict = create_KDE(args, mc=mc[train_index], bws=bw_dict)
        mc_val = mc[val_index]
        val_settings, grid = model.setup_KDE(mc_val)
        lh, zeros = do_validation(res_dict, val_settings, mc_val['cur_weight'])
        print('Number of zeros {}'.format(zeros))
        print('Likelihood Value {}'.format(lh))
        zero_arr.append(zeros)
        lh_arr.append(lh)
    fname = ''
    for i in range(len(args['bw'])):
        fname += '{}_{}_'.format(args['bw_key'][i], args['bw'][i])
    fname = fname[:-1] + '.npy'
    odict = {'zeros': zero_arr, 'lh': lh_arr}
    np.save(os.path.join(args['outfolder'], fname), odict)
Exemple #3
0
#
# Initialisation
#

# Dynamically read config to variables
config_files = [f for f in os.listdir('.')]
for c in config_files:
    if c.endswith('.json'):
        f = (open(c, "r+")).read()
        v = c.split('.')[0]
        globals()[v] = json.loads(f)
        print('loaded variable name: {}'.format(v))

# Load theme last as it relies on config to be loaded as well
theme = read_config('./static/themes/{}/theme.json'.format(
    config['theme']['active_theme']))

# Create temporary path if
# it does not already exist
if os.name == 'nt':
    temp_directory = "{}/thea".format(os.getenv('TEMP'))
else:
    temp_directory = '/tmp/thea'

try:
    os.makedirs(temp_directory)
except OSError as e:
    if e.errno != errno.EEXIST:
        raise

#
Exemple #4
0
def create_KDE(args, inds=None, bws={}, mc=None):
    if 'mc' not in args.keys():
        args['mc'] = None
    if 'phi0' not in args.keys():
        args['phi0'] = 1
    if args['outfolder'] is None:
        args['outfolder'] = os.path.join(os.path.dirname(args['model']), 'out')
    args['phi0'] *= 1e-18  # correct units
    t0 = time.time()
    model, mname = load_model(args['model'])
    print('---- Run KDE with args:')
    print(args)
    if not os.path.exists(args['outfolder']):
        os.makedirs(args['outfolder'])

    print('Load and Update the Monte Carlo')
    config = read_config()
    cfg_keys = config['keys']
    if mc is None:
        if args['mc'] is not None:
            mc_path = args['mc']
        else:
            mc_path = str(config['IC_MC']['path'])
        mc = np.load(str(mc_path))
        mc = mc_cut(mc, config)
        if inds is not None:
            print('Cut on given indices..')
            mc = mc[inds]
    settings, grid = model.setup_KDE(mc, cfg_keys)
    mc_conv = len(mc)
    print('Use {} mc events'.format(mc_conv))
    for key in settings.keys():
        settings[key]['name'] = key
    for key in bws.keys():
        settings[key]['bandwidth'] = bws[key]

    plaw = np.vectorize(powerlaw)

    # create binned pdf
    if args['weights'] == 'default':
        print('Use pre-calculated input weights')
        weights = mc['cur_weight']
    elif args['weights'] == 'pl':
        weights = mc[cfg_keys['ow']] * plaw(
            mc[cfg_keys['trueE']], phi0=args['phi0'], gamma=args['gamma'])
    elif args['weights'] == 'conv':
        weights = mc[cfg_keys['conv']]
    elif args['weights'] == 'conv+pl':
        #diff_weight = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'],
        #                                   gamma=args['gamma'])
        weights = mc[cfg_keys['conv']] + mc[cfg_keys['astro']]
        print('Rates [1/yr]:')
        print(np.sum(mc[cfg_keys['conv']]) * np.pi * 1e7)
        print(np.sum(mc[cfg_keys['astro']]) * np.pi * 1e7)
    else:
        print('{} is not a valid weights argument'.format(args['weights']))
        sys.exit(0)

    inp_arr = [settings[key] for key in settings.keys()]
    if args['adaptive']:
        m_input = meerkat_input(inp_arr, weights, mc_conv=mc_conv)
        m_kde4d_fb = meerkat_kde(m_input)
        adtv_input = meerkat_input(inp_arr,
                                   weights,
                                   pdf_seed=m_kde4d_fb.kde,
                                   adaptive=True,
                                   mc_conv=mc_conv)
        m_kde4d = meerkat_kde(adtv_input)
    else:
        m_input = meerkat_input(inp_arr, weights, mc_conv=mc_conv)
        m_kde4d = meerkat_kde(m_input)

    nbins = args['eval_bins']
    eval_grid = OrderedDict()
    if grid is None:
        grid = {}

    for key in settings.keys():
        if key in grid.keys():
            if isinstance(grid[key], list):
                eval_grid[key] = np.linspace(grid[key][0], grid[key][1], nbins)
            elif isinstance(grid[key], numpy.ndarray):
                eval_grid[key] = grid[key]
        else:
            eval_grid[key] = np.linspace(settings[key]['range'][0],
                                         settings[key]['range'][1], nbins)
    print(eval_grid.keys())
    out_bins = [eval_grid[key] for key in settings.keys()]
    coords = np.array(list(itertools.product(*out_bins)))
    bws = [settings[key]['bandwidth'] for key in settings.keys()]

    print('Evaluate KDEs:')
    pdf_vals = np.asarray([m_kde4d.eval_point(coord) for coord in coords])
    shpe = np.ones(len(settings.keys()), dtype=int) * nbins
    pdf_vals = pdf_vals.reshape(*shpe)

    add_str = ''
    if args['weights'] != 'pl':
        add_str = '_' + args['weights']
    else:
        add_str = '_' + 'g_{}'.format(args['gamma'])
    if args['save_str'] != '':
        add_str = add_str + '_' + args['save_str']

    odict = dict({
        'vars': eval_grid.keys(),
        'bins': out_bins,
        'coords': coords,
        'pdf_vals': pdf_vals,
        'bw': bws
    })

    if not args['no_save']:
        with open(os.path.join(args['outfolder'], mname + add_str + '.pkl'),
                  'wb') as fp:
            pickle.dump(odict, fp)
    t1 = time.time()
    print('Finished after {} minutes'.format((t1 - t0) / 60))
    return odict
    # ----------------------------
    # Parsing input args
    # ----------------------------
    parser = argparse.ArgumentParser(description="Download some HRRR data")
    parser.add_argument(
        "--config",
        "-c",
        type=str,
        default="config.conf",
        help="Name of the config file to be read. Default is 'config.conf'.")
    args = vars(parser.parse_args())

    # ----------------------------
    # Important step: Read the config file.
    # ----------------------------
    config = functions.read_config(args["config"])
    print(config)
    # No parameters?
    if len(config.params) == 0:
        raise Exception("No parameters to download! Check config file.")

    # ----------------------------
    # Load available files
    # ----------------------------
    gribfiles = functions.get_gribfiles_on_server(config)
    if len(gribfiles.get("files")) == 0:
        raise Exception("No files found on server - stop execution.")

    # ----------------------------
    # Create output directory
    # ----------------------------