Ejemplo n.º 1
0
def main():
    """
    Run program
    """
    cmn.make_dir(cmn.LOG_DIR)
    logging.basicConfig(
        # filename=os.path.join(cmn.LOG_DIR, cmn.LOG_FILE_NAME % time.strftime(cmn.LOG_FILE_NAME_TIME)),
        handlers=[
            logging.FileHandler(
                os.path.join(
                    cmn.LOG_DIR, cmn.LOG_FILE_NAME %
                    time.strftime(cmn.LOG_FILE_NAME_TIME))),
            logging.StreamHandler()
        ],
        level=cmn.LOG_LEVEL,
        format=cmn.LOG_MSG_FORMAT,
        datefmt=cmn.LOG_TIME_FORMAT)

    start_time = time.time()
    logging.info("Starting program ...\n")

    if "-hmo" in sys.argv:
        logging.info(
            "Detected 'heatmap only' command line argument, program will use pre-existing combined PTEN correlation file.\n"
        )
    else:
        combine.run()
    heatmap.run()

    logging.info("Finished running program.")
    run_time = time.time() - start_time
    logging.info("Total run time: %s.\n" %
                 time.strftime('%H:%M:%S', time.gmtime(run_time)))
Ejemplo n.º 2
0
def main():
    """
    Run program
    """
    cmn.make_dir(cmn.LOG_DIR)
    logging.basicConfig(
        # filename=os.path.join(cmn.LOG_DIR, cmn.LOG_FILE_NAME % time.strftime(cmn.LOG_FILE_NAME_TIME)),
        handlers=[
            logging.FileHandler(os.path.join(cmn.LOG_DIR, cmn.LOG_FILE_NAME % time.strftime(cmn.LOG_FILE_NAME_TIME))),
            logging.StreamHandler()
        ],
        level=cmn.LOG_LEVEL,
        format=cmn.LOG_MSG_FORMAT,
        datefmt=cmn.LOG_TIME_FORMAT
    )

    start_time = time.time()
    logging.info("Starting program ...")

    if "-man" in sys.argv:
        logging.info("Detected 'manifest only' command line argument, program will only generate file manifests.\n")
        file_lists.run()
    elif "-dlo" in sys.argv:
        logging.info("Detected 'download only' command line argument, program will use existing manifest lists.\n")
        files.run()
    else:
        logging.warning("No valid command line arguments, program will ignore arguments.\n")
        file_lists.run()
        files.run()

    logging.info("Finished running program.")
    run_time = time.time() - start_time
    logging.info("Total run time: %s.\n" % time.strftime('%H:%M:%S', time.gmtime(run_time)))
Ejemplo n.º 3
0
def train(args):
    assert args.num_classes
    common.make_dir(args.checkout_dir)
    nnet = CNN(args.left_context + args.right_context + 1, args.feat_dim, num_maps, pooling_size,
            filter_size, conn_dim, args.num_classes)
    print(nnet)
    nnet.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = th.optim.Adam(nnet.parameters(), lr=args.learning_rate)

    train_dataset = THCHS30(root=args.data_dir, data_type='train', left_context=left_context,
            right_context=right_context, model_type='cnn')
    train_loader  = data.DataLoader(dataset=train_dataset, batch_size=args.min_batch,
                                    shuffle=True, num_workers=6)

    test_dataset = THCHS30(root=args.data_dir, data_type='test', left_context=left_context,
            right_context=right_context, model_type='cnn')
    test_loader  = data.DataLoader(dataset=test_dataset, batch_size=args.min_batch,
                                    shuffle=True, num_workers=6)

    cross_validate(-1, nnet, test_dataset, test_loader) 
    for epoch in range(args.num_epochs):
        common.train_one_epoch(nnet, criterion, optimizer, train_loader)
        cross_validate(epoch, nnet, test_dataset, test_loader) 
        th.save(nnet, common..join_path(args.checkout_dir, 'cnn.{}.pkl'.format(epoch + 1)))
Ejemplo n.º 4
0
def train(args):
    common.make_dir(args.checkout_dir)
    # nnet
    nnet = RNN((args.left_context + args.right_context + 1) * args.feat_dim, \
               hidden_layer, hidden_size, args.num_classes, dropout=dropout)
    print(nnet)
    nnet.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = th.optim.Adam(nnet.parameters(), lr=args.learning_rate)

    train_dataset = THCHS30(root=args.data_dir, data_type='train')
    train_loader = data.DataLoader(dataset=train_dataset,
                                   batch_size=args.min_batch,
                                   shuffle=True)

    test_dataset = THCHS30(root=args.data_dir, data_type='test')
    test_loader = data.DataLoader(dataset=test_dataset,
                                  batch_size=args.min_batch,
                                  shuffle=True)

    cross_validate(-1, nnet, test_loader, test_dataset.num_frames)
    for epoch in range(args.num_epochs):
        common.train_one_epoch(nnet,
                               criterion,
                               optimizer,
                               train_loader,
                               is_rnn=True)
        cross_validate(epoch, nnet, test_loader, test_dataset.num_frames)
        th.save(
            nnet,
            common.join_path(args.checkout_dir,
                             'rnn.{}.pkl'.format(epoch + 1)))
Ejemplo n.º 5
0
def configure_modbus_logger(cfg, recycle_logs=True):
    """
    Configure the logger.

    Args:
        cfg (Namespace): The PUReST config namespace.
    """

    logger = logging.getLogger("modbus_tk")
    if isinstance(cfg, dict):
        cfg = Configuration(**cfg)

    if cfg.no_modbus_log:
        logger.setLevel(logging.ERROR)
        logger.addHandler(logging.NullHandler())
    else:
        logger.setLevel(logging.DEBUG)
        fmt = "%(asctime)s - %(levelname)s - " "%(module)s::%(funcName)s @ %(lineno)d - %(message)s"
        fmtr = logging.Formatter(fmt)

        if not cfg.no_modbus_console_log:
            sh = logging.StreamHandler()
            sh.setFormatter(fmtr)
            sh.setLevel(cfg.modbus_console_log_level.upper())
            logger.addHandler(sh)

        if not cfg.no_modbus_file_log:
            modbus_log = path(cfg.modbus_log)
            if recycle_logs:
                remove_file(modbus_log)
            make_dir(os.path.dirname(modbus_log))
            fh = logging.FileHandler(modbus_log)
            fh.setFormatter(fmtr)
            fh.setLevel(cfg.modbus_file_log_level.upper())
            logger.addHandler(fh)
Ejemplo n.º 6
0
    def __call__(self):
        import h5py

        self.logger.info('load training data: ' + self.infile)
        fin = h5py.File(self.infile, 'r')
        X_train = fin[self.xname][:]
        y_train = fin[self.yname][:]
        fin.close()

        valid_data = None
        if self.valid_file:
            self.logger.info('load validation data: ' + self.valid_file)
            fin = h5py.File(self.valid_file, 'r')
            X_valid = fin[self.valid_xname][:]
            y_valid = fin[self.valid_yname][:]
            fin.close()
            valid_data = (X_valid, y_valid)

        window_size = X_train.shape[1]
        from keras.optimizers import RMSprop
        optimizer = RMSprop(lr=self.learning_rate)
        # load model
        # variables optimizer, loss may be overloaded
        regression = self.regression
        if self.regression:
            loss = 'mean_squared_error'
            metrics = ['mean_squared_error']
        else:
            loss = 'binary_crossentropy'
            metrics = ['accuracy']

        with open(self.model_script, 'r') as f:
            exec compile(f.read(), self.model_script, 'exec')

        model.compile(optimizer=optimizer,
                    loss=loss,
                    metrics=metrics)
        model.summary()

        callbacks = []
        if self.tensorboard_log_dir:
            from keras.callbacks import TensorBoard
            callbacks = [TensorBoard(log_dir=self.tensorboard_log_dir)]
        else:
            callbacks = []
        if self.keras_log is not None:
            self.logger.info('open CSV log file: {}'.format(self.keras_log))
            make_dir(os.path.dirname(self.keras_log))
            callbacks.append(keras.callbacks.CSVLogger(self.keras_log))

        self.logger.info('train model')
        model.fit(X_train, y_train,
            batch_size=self.batch_size, epochs=self.epochs,
            callbacks=callbacks, verbose=self.keras_verbose,
            validation_data=valid_data)
        self.logger.info('save model: {}'.format(self.model_file))
        prepare_output_file(self.model_file)
        model.save(self.model_file)
Ejemplo n.º 7
0
    def __call__(self):
        import h5py
        self.logger.info('load model: {}'.format(self.model_file))
        if self.model_format == 'keras':
            model = keras.models.load_model(self.model_file)
        elif self.model_format == 'sklearn':
            import cPickle
            with open(self.model_file, 'r') as f:
                model = cPickle.load(f)

        self.logger.info('load data: {}'.format(self.infile))
        fin = h5py.File(self.infile, 'r')
        X_test = fin[self.xname][:]
        y_test = fin[self.yname][:]
        fin.close()

        self.logger.info('run the model')
        if self.model_format == 'keras':
            y_pred = model.predict(X_test, batch_size=self.batch_size)
        elif self.model_format == 'sklearn':
            y_pred = model.predict(X_test)

        y_pred = np.squeeze(y_pred)
        if self.swap_labels:
            self.logger.info('swap labels')
            y_pred = 1 - y_pred
        y_pred_labels = (y_pred >= self.cutoff).astype('int32')

        # ingore NaNs in y_test
        y_test = y_test.flatten()
        y_pred = y_pred.flatten()
        y_pred_labels = y_pred_labels.flatten()
        not_nan_mask = np.logical_not(np.isnan(y_test))
        y_test = y_test[not_nan_mask]
        y_pred = y_pred[not_nan_mask]
        y_pred_labels = y_pred_labels[not_nan_mask]

        scores = {}
        for metric in self.metrics:
            # y_pred is an array of continous scores
            scorer = get_scorer(metric)
            if metric == 'roc_auc':
                scores[metric] = scorer(y_test, y_pred)
            else:
                scores[metric] = scorer(y_test, y_pred_labels)
            self.logger.info('metric {} = {}'.format(metric, scores[metric]))
        if self.outfile is not None:
            self.logger.info('save file: {}'.format(self.outfile))
            make_dir(os.path.dirname(self.outfile))
            fout = h5py.File(self.outfile, 'w')
            fout.create_dataset('y_true', data=y_test)
            fout.create_dataset('y_pred', data=y_pred)
            fout.create_dataset('y_pred_labels', data=y_pred_labels)
            grp = fout.create_group('metrics')
            for metric in self.metrics:
                grp.create_dataset(metric, data=scores[metric])
            fout.close()
Ejemplo n.º 8
0
def create_channel_folder(work_dir, channel_id):
    global error_creator
    error_creator = 0
    try: 
        if error_renamer == 0: 
            common.make_dir(os.path.join(work_dir, channel_id))
        else:
            return
    except:
        error_creator = 1
Ejemplo n.º 9
0
def run():
    """
    Run the downloading script to download files listed in previously
    acquired manifest lists.
    """

    # number of file currently being requested/downloaded
    file_number = [1]

    cmn.make_dir(cmn.DL_DIR)
    download_files(file_number)
Ejemplo n.º 10
0
 def __call__(self):
     model_weights = h5py.File(self.model_file, 'r')['/model_weights/dense_1/dense_1_W:0'][:]
     window_size = model_weights.shape[0]/len(self.alphabet)
     model_weights = model_weights.reshape((window_size, 4))
     fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(12, 9), sharey=True)
     for i in range(len(self.alphabet)):
         ax = axes[i]
         ax.bar(np.arange(window_size), model_weights[:, i], color='b', edgecolor='none')
         ax.set_xticks(np.arange(window_size, step=5))
         ax.set_xlim(0, window_size)
         ax.set_title('Logistic regression weights ({})'.format(self.alphabet[i]))
         ax.set_ylabel('Weight')
     self.logger.info('savefig: {}'.format(self.outfile))
     make_dir(os.path.dirname(self.outfile))
     plt.tight_layout()
     plt.savefig(self.outfile, dpi=150, bbox_inches='tight')
Ejemplo n.º 11
0
def get_video_info(video_id):
    video_info_list = []
    items = youtube.videos().list(part="snippet, contentDetails",
                                  id=video_id).execute()["items"][0]
    resp_save_dest = "data/resp/yt/v/"
    timestamp = common.now_iso(1)  # UTC+0
    common.make_dir(resp_save_dest)
    common.write_json(resp_save_dest + timestamp + " " + video_id + ".json",
                      items)
    video_info_list.append(items["snippet"]["channelTitle"])  # 0
    video_info_list.append(items["snippet"]["channelId"])  # 1
    video_info_list.append(items["snippet"]["publishedAt"])  # 2
    video_info_list.append(items["snippet"]["title"])  # 3
    video_info_list.append(items["snippet"]["description"])  # 4
    video_info_list.append(items["contentDetails"]["duration"])  # 5
    return video_info_list  # Throw video_info_list to fileproc.
Ejemplo n.º 12
0
 def __call__(self):
     self.treatment = read_hdf5(self.treatment_file)
     self.control = read_hdf5(self.control_file)
     name_counts = np.unique(np.concatenate(
         [self.treatment['name'], self.control['name']]),
                             return_counts=True)
     common_names = name_counts[0][name_counts[1] == 2]
     if self.names is None:
         self.names = common_names[:self.max_plots]
     from matplotlib.backends.backend_pdf import PdfPages
     self.logger.info('open pdf file: {}'.format(self.outfile))
     make_dir(os.path.dirname(self.outfile))
     with PdfPages(self.outfile) as pdf:
         for seqname in self.names:
             self.logger.info('plot track: {}'.format(seqname))
             pdf.savefig(self.plot_tracks(seqname))
Ejemplo n.º 13
0
 def analyze(self):
     self.icshape = read_hdf5(self.icshape_file)
     self.background = read_hdf5(self.background_file)
     self.target = read_hdf5(self.target_file)
     name_counts = np.unique(np.concatenate([
         self.icshape['name'], self.background['name'], self.target['name']
     ]),
                             return_counts=True)
     common_names = name_counts[0][name_counts[1] == 3]
     if self.names is None:
         self.names = common_names[:self.max_plots]
     from matplotlib.backends.backend_pdf import PdfPages
     self.logger.info('open pdf file: {}'.format(self.outfile))
     make_dir(os.path.dirname(self.outfile))
     with PdfPages(self.outfile) as pdf:
         for seqname in self.names:
             self.logger.info('plot track: {}'.format(seqname))
             pdf.savefig(self.plot_tracks(seqname))
Ejemplo n.º 14
0
def main():
    """
    Run program
    """
    cmn.make_dir(cmn.LOG_DIR)
    logging.basicConfig(
        # filename=os.path.join(cmn.LOG_DIR, cmn.LOG_FILE_NAME % time.strftime(cmn.LOG_FILE_NAME_TIME)),
        handlers=[
            logging.FileHandler(
                os.path.join(
                    cmn.LOG_DIR, cmn.LOG_FILE_NAME %
                    time.strftime(cmn.LOG_FILE_NAME_TIME))),
            logging.StreamHandler()
        ],
        level=cmn.LOG_LEVEL,
        format=cmn.LOG_MSG_FORMAT,
        datefmt=cmn.LOG_TIME_FORMAT)

    start_time = time.time()
    logging.info("Starting program ...")

    r, h = False, True

    if "-r" in sys.argv:
        logging.info(
            "Detected 'raw data' command line argument, program will integrate raw counts (non-normalized).\n"
        )
        r = True
    if "-nh" in sys.argv:
        logging.info(
            "Detected 'no headers' command line argument, program will not add headers to integrated files.\n"
        )
        h = False
    else:
        logging.warning(
            "No valid command line arguments, program will ignore arguments.\n"
        )

    integrate.run(r, h)

    logging.info("Finished running program.")
    run_time = time.time() - start_time
    logging.info("Total run time: %s.\n" %
                 time.strftime('%H:%M:%S', time.gmtime(run_time)))
Ejemplo n.º 15
0
def run():
    """
    Run the file manifest-list files generating stage
    """
    # create required directory
    cmn.make_dir(cmn.FILE_LIST_DIR)

    # generate RNA file lists
    n_rna = gen_file_lists("RNA")

    # generate miRNA file lists
    n_mirna = gen_file_lists("miRNA")

    logging.info("manifest list files generated in '%s' :" % cmn.FILE_LIST_DIR)
    logging.info(
        "%s RNA-seq manifest lists generated, with %s manifests in each list."
        % (n_rna, cmn.FILES_PER_LIST))
    logging.info(
        "%s miRNA-seq manifest lists generated, with %s manifests in each list.\n"
        % (n_mirna, cmn.FILES_PER_LIST))
Ejemplo n.º 16
0
 def __call__(self):
     self.logger.info('load DMS-seq scores from: {}'.format(
         self.dmsseq_file))
     dmsseq = GenomicData(self.dmsseq_file, ['dmsseq'])
     scores = dmsseq['dmsseq']
     cutoff1 = np.percentile(scores, self.percentile)
     cutoff2 = np.percentile(scores, 100 - self.percentile)
     self.logger.info('DMS-seq score cutoffs: {}-{}'.format(
         cutoff1, cutoff2))
     discard = np.logical_and(cutoff1 < scores, scores < cutoff2)
     scores[(scores <= cutoff1) & np.logical_not(discard)] = 0
     scores[(scores >= cutoff2) & np.logical_not(discard)] = 1
     fasta_f = IndexedFastaReader(self.sequence_file)
     # calculate base distribution
     self.logger.info('calculate base distribution')
     self.offsets = range(-self.max_offset, self.max_offset + 1)
     base_dist = np.zeros([len(self.offsets), 2, 4], dtype='int64')
     progress = ProgressBar(len(dmsseq.names), title='')
     for name in dmsseq.names:
         seq = np.frombuffer(fasta_f[name], dtype='S1')
         values = dmsseq.feature('dmsseq', name)
         ind_valid = (np.logical_not(np.isnan(values)))[0]
         ind_one_ts = np.nonzero(values == 1)[0]
         ind_zero_ts = np.nonzero(values == 0)[0]
         for i_offset, offset in enumerate(self.offsets):
             ind_one = ind_one_ts + offset
             ind_one = ind_one[(ind_one >= 0) & (ind_one < len(seq))]
             ind_zero = ind_zero_ts + offset
             ind_zero = ind_zero[(ind_zero >= 0) & (ind_zero < len(seq))]
             for i in range(len(self.alphabet)):
                 if len(ind_zero) > 0:
                     base_dist[i_offset, 0, i] += (
                         seq[ind_zero] == self.alphabet[i]).sum()
                 if len(ind_one) > 0:
                     base_dist[i_offset, 1, i] += (
                         seq[ind_one] == self.alphabet[i]).sum()
         progress.update()
     progress.finish()
     fasta_f.close()
     base_dist = base_dist.astype('float64')
     # plot
     fig, axes = plt.subplots(nrows=2,
                              ncols=len(self.offsets),
                              figsize=(20, 4),
                              sharey=True)
     fig.tight_layout()
     for i, offset in enumerate(self.offsets):
         for label in (0, 1):
             self.logger.debug('plot_base_dist: {}, {}'.format(
                 label, offset))
             base_dist[i, label, :] /= base_dist[i, label, :].sum()
             ax = axes[label, i]
             ax.bar(np.arange(len(self.alphabet)),
                    base_dist[i, label, :],
                    color='k',
                    edgecolor='none',
                    align='center')
             ax.set_xticks(np.arange(len(self.alphabet)))
             ax.set_xticklabels(self.alphabet)
             ax.set_ylabel('Density')
             ax.set_title('({}, {})'.format(label, offset))
     self.logger.info('savefig: {}'.format(self.outfile))
     make_dir(os.path.dirname(self.outfile))
     plt.savefig(self.outfile, dpi=150, bbox_inches='tight')
Ejemplo n.º 17
0
def _download_files(file_list, file_number):
    """
    Download files listed in one manifest list object

    :param file_list: Object a json object of the list from one manifest-list file
    :param file_number: List contains 1 element that is the current number/index of the file being downloaded
    """
    # 'file' here is a file manifest, or information of a file
    for file in file_list["data"]["hits"]:
        logging.info("Processing file #%s ..." % file_number[0])
        '''
        this is where the organizational structure of the directories 
        for the downloaded files is defined. Using sample_manifest.json
        as a guideline. Directory path for each file is defined then created
        if necessary
        '''
        file_dir_path = [cmn.DL_DIR]

        try:
            file_dir_path.append(file["cases"][0]["project"]["primary_site"])
        except KeyError:
            logging.warning(
                "File manifest did not provide attribute 'primary_site' for tissue sample."
            )
            file_dir_path.append("unknown")

        try:
            file_dir_path.append(file["cases"][0]["demographic"]["gender"])
        except KeyError:
            logging.warning(
                "File manifest did not provide attribute 'gender' for tissue sample."
            )
            file_dir_path.append("unknown")

        try:
            file_dir_path.append(file["cases"][0]["samples"][0]["sample_id"])
        except KeyError:
            logging.warning(
                "File manifest did not provide attribute 'sample_id' for tissue sample."
            )
            file_dir_path.append("unknown")

        file_dir = os.path.join(*file_dir_path)
        cmn.make_dir(file_dir)

        file_path = os.path.join(file_dir, file["file_name"])

        gzip_archive = False
        if file_path.endswith('.gz') or file_path.endswith('.GZ'):
            logging.info("File is a gzip archive.")
            gzip_archive = True

        if (not gzip_archive and not os.path.isfile(file_path)) or (
                gzip_archive
                and not os.path.isfile('%s.txt' % file_path[:-3])):

            logging.info("Requesting file '%s'..." % file["file_name"])

            # send a GET request for this single file using it's UUID
            r = requests.get("%s/%s" % (URL, file["file_id"]))
            logging.info(r)
            if r.status_code == requests.codes.ok:
                logging.info("Request ok, downloading file ...")

                logging.info("Writing file to '%s' ..." % file_path)

                # write HTTP response to the file as a byte stream
                with open(file_path, 'wb') as file_out:
                    for chunk in r.iter_content(chunk_size=128):
                        file_out.write(chunk)

                if gzip_archive:
                    logging.info("Extracting archive to '%s' ..." %
                                 ('%s.txt' % file_path[:-3]))
                    with gzip.open(file_path, 'rb') as f_in, open(
                            '%s.txt' % file_path[:-3], 'wb') as f_out:
                        shutil.copyfileobj(f_in, f_out)
                    logging.info(
                        "Text file extracted, removing gzip archive ...")
                    os.remove(file_path)
                    logging.info("Gzip archive removed.")

                logging.info("File downloaded.\n")

            else:
                logging.error("Request failed, skipping file.\n")

        else:
            if gzip_archive:
                logging.info("File '%s' already exists.\n" % '%s.txt' %
                             file_path[:-3])
            else:
                logging.info("File '%s' already exists.\n" % file_path)

        file_number[0] += 1
Ejemplo n.º 18
0
def combine_corr_files():
    """
    """
    combined_file_path = os.path.join(cmn.DL_DIR, cmn.PTEN_CORR_FNAME)

    n = 30
    m = -30
    gdf = pd.read_csv(cmn.GENE_REF_FILE, sep="\t", index_col=0)
    gene_list = [l[0] for l in gdf.values[:n].tolist()]
    gene_names = [l for l in gdf.index[:n].tolist()]

    with open(combined_file_path, 'w') as out_file:
        out_file.write("\n".join(["0"] + list(gene_names)))

    cdf = pd.read_csv(combined_file_path, sep="\t", index_col=0)
    i = 0

    for cancer_type in cmn.list_dir(cmn.DL_DIR):
        if cancer_type not in cmn.SKIP_CTYPE:
            for gender in cmn.list_dir(os.path.join(cmn.DL_DIR, cancer_type)):
                if gender not in cmn.SKIP_GENDER:
                    logging.info(
                        "Reading correlation files for '%s' > '%s' ..." %
                        (cancer_type, gender))

                    samples_path = os.path.join(cmn.DL_DIR, cancer_type,
                                                gender)

                    corr_file = cmn.CORR_FNAME % (cancer_type.lower(),
                                                  gender.lower(), "rna",
                                                  "normalized")
                    corr_file_path = os.path.join(samples_path, corr_file)

                    if os.path.isfile(corr_file_path):
                        logging.info(
                            "Found correlation file, processing ...\n")

                        df = pd.read_csv(corr_file_path, sep="\t", index_col=0)
                        ver_dict = {
                            key: value
                            for (key, value) in
                            [cc.split(".") for cc in df.index.values]
                        }

                        entries = [
                            abs(
                                float(df.loc[".".join(
                                    (gene_code, ver_dict[gene_code]))][0]))
                            if gene_code in ver_dict else 0.0
                            for gene_code in gene_list
                        ]
                        cdf.insert(i, "%s-%s" % (cancer_type, gender), entries)
                        i += 1

                        logging.info("Done.\n")

                    else:
                        logging.warning(
                            "No correlation file found for '%s' > '%s'.\n" %
                            (cancer_type, gender))

    logging.info("Writing file ...")
    cdf.to_csv(combined_file_path, sep="\t")

    cmn.make_dir("ref")
    cdf.to_csv(os.path.join("ref", cmn.PTEN_CORR_FNAME), sep="\t")