def main(): timeBegin = time.time() if (len(sys.argv) != 7): if (len(sys.argv) > 1 and (sys.argv[1] == "--help" or sys.argv[1] == "-h")): print_help(0) else: print_help(1) #pathToGtf = "/reference/homo_sapiens/GRCh38/ensembl/Annotation/Genes/gtf/Homo_sapiens.GRCh38.83.gtf" #pathToSeq = "/reference/homo_sapiens/GRCh38/ensembl/Sequence/WholeGenomeFasta/Homo_sapiens.GRCh38.dna.primary_assembly.fa" random.seed(42) pathToGtf = sys.argv[1] pathToSeq = sys.argv[2] pathToConfig = sys.argv[3] pathToFastq = sys.argv[5] readType = sys.argv[6] gtfList = read_gtf(pathToGtf) exonList = get_exon_list(gtfList) transList = get_transcript_list(gtfList, exonList) geneList = get_gene_list(gtfList, transList) chrmList = read_genome(pathToSeq) uniqueFeatureList = get_list_of_unique_gtf_features(gtfList) get_exon_seq(exonList, chrmList) link_exons_trans_and_genes(gtfList, exonList, transList, geneList) # print_transcripts_with_seqs(transList) # Debug link_exons_trans_and_genes() geneDict, transDict = create_gene_and_trans_lookup_dict( geneList, transList) print_gtf_statistics(exonList, transList, geneList) # find_trans_that_differ_by_1_exon(geneList, transList) # Uncomment for complete list readLength, desiredTransList, abundanceList, numOfReads = read_config( pathToConfig) numOfReads = int(sys.argv[4]) if (readType != 'single' and readType != 'paired-fr-first' and readType != 'paired-fr-second'): exit_with_error("ERROR!!! Incorrect value for {}".format(readType)) else: ### Paired end reads are not working yet ### if (readType == 'paired-fr-first' or readType == 'paired-fr-second'): exit_with_error( "ERROR!!! paired-fr-first and paired-fr-second \n" "not yet implemented. \n\n" "NOTE:: Both reads are tentatively found in the \n" " INSERT class. The second read is not used.\n" " The second read should definitely needs checked.\n") create_fastq_file(pathToFastq, desiredTransList, abundanceList, numOfReads, readLength, transDict, transList, exonList, readType) print("Unique features in Gtf : ") for feature in uniqueFeatureList: print("\t%s" % (feature)) timeEnd = time.time() print("Run time : %s" % (timeEnd - timeBegin)) sys.exit(0)
def cross_validate(args): assert len(args['bw_key']) == len(args['bw']) if not os.path.exists(args['outfolder']): os.makedirs(args['outfolder']) args['phi0'] *= 1e-18 # correct units kf = KFold(n_splits=args['kfold'], random_state=args['rs'], shuffle=True) config = read_config() print('Load MC: {}'.format(config['IC_MC']['path'])) mc = np.load(str(config['IC_MC']['path']))[:] mc = mc_cut(mc) if args['weights'] == 'pl': weights = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'], gamma=args['gamma']) elif args['weights'] == 'conv': weights = mc['conv'] elif args['weights'] == 'conv+pl': diff_weight = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'], gamma=args['gamma']) weights = mc['conv'] + diff_weight print('Rates [1/yr]:') print(np.sum(mc['conv']) * np.pi * 1e7) print(np.sum(diff_weight) * np.pi * 1e7) else: print('{} is not a valid weights argument'.format(args['weights'])) sys.exit(0) mc = append_fields(mc, 'cur_weight', weights) args['weights'] = 'default' model, mname = load_model(args['model']) bw_dict = dict() for i, key in enumerate(args['bw_key']): bw_dict[key] = args['bw'][i] lh_arr, zero_arr = [], [] for train_index, val_index in kf.split(mc): args['no_save'] = True res_dict = create_KDE(args, mc=mc[train_index], bws=bw_dict) mc_val = mc[val_index] val_settings, grid = model.setup_KDE(mc_val) lh, zeros = do_validation(res_dict, val_settings, mc_val['cur_weight']) print('Number of zeros {}'.format(zeros)) print('Likelihood Value {}'.format(lh)) zero_arr.append(zeros) lh_arr.append(lh) fname = '' for i in range(len(args['bw'])): fname += '{}_{}_'.format(args['bw_key'][i], args['bw'][i]) fname = fname[:-1] + '.npy' odict = {'zeros': zero_arr, 'lh': lh_arr} np.save(os.path.join(args['outfolder'], fname), odict)
# # Initialisation # # Dynamically read config to variables config_files = [f for f in os.listdir('.')] for c in config_files: if c.endswith('.json'): f = (open(c, "r+")).read() v = c.split('.')[0] globals()[v] = json.loads(f) print('loaded variable name: {}'.format(v)) # Load theme last as it relies on config to be loaded as well theme = read_config('./static/themes/{}/theme.json'.format( config['theme']['active_theme'])) # Create temporary path if # it does not already exist if os.name == 'nt': temp_directory = "{}/thea".format(os.getenv('TEMP')) else: temp_directory = '/tmp/thea' try: os.makedirs(temp_directory) except OSError as e: if e.errno != errno.EEXIST: raise #
def create_KDE(args, inds=None, bws={}, mc=None): if 'mc' not in args.keys(): args['mc'] = None if 'phi0' not in args.keys(): args['phi0'] = 1 if args['outfolder'] is None: args['outfolder'] = os.path.join(os.path.dirname(args['model']), 'out') args['phi0'] *= 1e-18 # correct units t0 = time.time() model, mname = load_model(args['model']) print('---- Run KDE with args:') print(args) if not os.path.exists(args['outfolder']): os.makedirs(args['outfolder']) print('Load and Update the Monte Carlo') config = read_config() cfg_keys = config['keys'] if mc is None: if args['mc'] is not None: mc_path = args['mc'] else: mc_path = str(config['IC_MC']['path']) mc = np.load(str(mc_path)) mc = mc_cut(mc, config) if inds is not None: print('Cut on given indices..') mc = mc[inds] settings, grid = model.setup_KDE(mc, cfg_keys) mc_conv = len(mc) print('Use {} mc events'.format(mc_conv)) for key in settings.keys(): settings[key]['name'] = key for key in bws.keys(): settings[key]['bandwidth'] = bws[key] plaw = np.vectorize(powerlaw) # create binned pdf if args['weights'] == 'default': print('Use pre-calculated input weights') weights = mc['cur_weight'] elif args['weights'] == 'pl': weights = mc[cfg_keys['ow']] * plaw( mc[cfg_keys['trueE']], phi0=args['phi0'], gamma=args['gamma']) elif args['weights'] == 'conv': weights = mc[cfg_keys['conv']] elif args['weights'] == 'conv+pl': #diff_weight = mc['orig_OW'] * plaw(mc['trueE'], phi0=args['phi0'], # gamma=args['gamma']) weights = mc[cfg_keys['conv']] + mc[cfg_keys['astro']] print('Rates [1/yr]:') print(np.sum(mc[cfg_keys['conv']]) * np.pi * 1e7) print(np.sum(mc[cfg_keys['astro']]) * np.pi * 1e7) else: print('{} is not a valid weights argument'.format(args['weights'])) sys.exit(0) inp_arr = [settings[key] for key in settings.keys()] if args['adaptive']: m_input = meerkat_input(inp_arr, weights, mc_conv=mc_conv) m_kde4d_fb = meerkat_kde(m_input) adtv_input = meerkat_input(inp_arr, weights, pdf_seed=m_kde4d_fb.kde, adaptive=True, mc_conv=mc_conv) m_kde4d = meerkat_kde(adtv_input) else: m_input = meerkat_input(inp_arr, weights, mc_conv=mc_conv) m_kde4d = meerkat_kde(m_input) nbins = args['eval_bins'] eval_grid = OrderedDict() if grid is None: grid = {} for key in settings.keys(): if key in grid.keys(): if isinstance(grid[key], list): eval_grid[key] = np.linspace(grid[key][0], grid[key][1], nbins) elif isinstance(grid[key], numpy.ndarray): eval_grid[key] = grid[key] else: eval_grid[key] = np.linspace(settings[key]['range'][0], settings[key]['range'][1], nbins) print(eval_grid.keys()) out_bins = [eval_grid[key] for key in settings.keys()] coords = np.array(list(itertools.product(*out_bins))) bws = [settings[key]['bandwidth'] for key in settings.keys()] print('Evaluate KDEs:') pdf_vals = np.asarray([m_kde4d.eval_point(coord) for coord in coords]) shpe = np.ones(len(settings.keys()), dtype=int) * nbins pdf_vals = pdf_vals.reshape(*shpe) add_str = '' if args['weights'] != 'pl': add_str = '_' + args['weights'] else: add_str = '_' + 'g_{}'.format(args['gamma']) if args['save_str'] != '': add_str = add_str + '_' + args['save_str'] odict = dict({ 'vars': eval_grid.keys(), 'bins': out_bins, 'coords': coords, 'pdf_vals': pdf_vals, 'bw': bws }) if not args['no_save']: with open(os.path.join(args['outfolder'], mname + add_str + '.pkl'), 'wb') as fp: pickle.dump(odict, fp) t1 = time.time() print('Finished after {} minutes'.format((t1 - t0) / 60)) return odict
# ---------------------------- # Parsing input args # ---------------------------- parser = argparse.ArgumentParser(description="Download some HRRR data") parser.add_argument( "--config", "-c", type=str, default="config.conf", help="Name of the config file to be read. Default is 'config.conf'.") args = vars(parser.parse_args()) # ---------------------------- # Important step: Read the config file. # ---------------------------- config = functions.read_config(args["config"]) print(config) # No parameters? if len(config.params) == 0: raise Exception("No parameters to download! Check config file.") # ---------------------------- # Load available files # ---------------------------- gribfiles = functions.get_gribfiles_on_server(config) if len(gribfiles.get("files")) == 0: raise Exception("No files found on server - stop execution.") # ---------------------------- # Create output directory # ----------------------------