Example #1
0
def make_listing_clusters():
    #for now
    #shape up features
    import features

    #make big and small features (stored in database)
    features.main()  #really only need to make fancy features

    #cluster
    import listing_cluster
    listing_cluster.main()
Example #2
0
def make_listing_clusters():
    #for now
    #shape up features
    import features

    #make big and small features (stored in database)
    features.main() #really only need to make fancy features

    #cluster
    import listing_cluster
    listing_cluster.main()
def stream_handler(message):
    print(message["path"])  # /-K7yGTTEp7O549EzTYtI
    print(message["data"])  # {'title': 'Pyrebase', "body": "etc..."}
    entry_len = len(message["path"])
    entry = message["path"]
    if (message and message['data'] != None):
        if (entry_len > 1 and (entry[entry_len - 1] + entry[entry_len - 2] +
                               entry[entry_len - 3] != 'mP/')):
            newpath = r"C:\Users\HP\Desktop\%s" % message["path"]
            if not os.path.exists(newpath):
                os.makedirs(newpath)
                time.sleep(2)
            st.child("uploads/air/394.jpg").download(
                r"C:\Users\HP\Desktop\%s\1.jpg" % message["path"])
            features.main()
            nearest_loc.main()
def main():
    url = sys.argv[1]
    features_test = features.main(url)

    clf = joblib.load('random_forest.pkl')

    pred = clf.predict(features_test)

    if int(pred[0]) == 1:
        print("Website ini aman")
    elif int(pred[0]) == -1:
        print("Website ini tidak aman!")
def main():
    url=sys.argv[1]

    features_test=features.main(url)

    clf = joblib.load('random_forest.pkl')

    pred=clf.predict(features_test)
    prob=clf.predict_proba(features_test)

    if int(pred[0])==1:
        print ("This is a safe website.")
    elif int(pred[0])==-1:
        print ("This is a phishing website..!")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
###
import evaluation
import check_tests as ct
import features
###
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import roc_curve, auc


train = pd.read_csv('../data/training.csv', index_col='id')
subset = [1,2,3,4,5]
variables = train.columns[subset]

trained_model = features.main('rf', variables)
ct.agreement(trained_model, variables)
ct.correlation(trained_model, variables)
ct.weightedAuc(trained_model, variables, train)
Example #7
0
def extractFeatures_given_gff(config, gff_infile, outdir, has_mirna,
                              is_consider_corr):
    cparser = SafeConfigParser()
    cparser.read(config)

    tc_config = cparser.get('configs', 'tcconfig')
    m_mirna = cparser.get('correlation', 'srnaseqmatrix')

    f_fasta = cparser.get('genome', 'fasta')
    f_chromsizes = cparser.get('genome', 'chromsizes')
    d_phastcons = cparser.get('cons', 'phastcons')
    TRAP = cparser.get('tata', 'trap')
    f_psemmatrix = cparser.get('tata', 'psem')

    mirbase_gff2 = cparser.get('mirbase', 'gff2')
    corrmethod = cparser.get('correlation', 'corrmethod')

    ## PART1: tc normalization
    ## 1a. setup infile
    outdir_tc = os.path.join(outdir, 'tc-norm')
    f1_pos = os.path.join(outdir, 'f1_pos.txt')
    ensure_dir(outdir_tc)

    ## 1b. reformat infile so that can be read by tc-quantify
    _reformat_infile_gff2tcnorm(gff_infile, f1_pos)

    ## 1c. run
    fo_bed = tc_normalization.main(tc_config, f1_pos, outdir_tc)

    ncount_dict = {}
    with open(fo_bed) as f:
        for line in f:
            l = line.strip().split('\t')
            try:
                _, chrom, start, _, stop, strand = re.split('[r:.,]', l[3])
                pos = '.'.join([chrom, start, stop, strand])
                ncount_dict[pos] = l[6]
            except ValueError:
                print '#[tcBedSpltErr]: %s' % line,

    ## 1d. setup outfile
    f_rle = re.sub('max_tpm.bed$', 'tpm_rle.matrix', fo_bed)

    tcparser = SafeConfigParser()
    tcparser.read(tc_config)
    f_ids = tcparser.get('tc_normalization', 'ids')

    ## PART2: compute cpg, cons, tata ...
    outdir_seqfeatures = os.path.join(outdir, 'seqfeatures/')
    ensure_dir(outdir_seqfeatures)

    gff1kb_infile = os.path.join(outdir_seqfeatures, 'infile_1kbseq.gff')
    gff_1kbfeatures = os.path.join(outdir_seqfeatures, 'features_1kbseq.gff')

    _reformat_tss_to_1kb(f1_pos, gff1kb_infile)

    features.main(gff1kb_infile, outdir_seqfeatures, f_fasta, f_chromsizes,
                  d_phastcons, TRAP, f_psemmatrix, gff_1kbfeatures)

    ## PART3: compute mprox ...
    outdir_tmp = os.path.join(outdir, 'intermediates')
    ensure_dir(outdir_tmp, False)

    gff_mproxfeatures = os.path.join(outdir_tmp, 'features_mprox.gff')
    gff_ufeat1 = os.path.join(outdir_tmp, 'features.1kb.mprox.gff')

    if has_mirna:
        _interpret_mprox(gff_infile, mirbase_gff2, gff_mproxfeatures)
    else:
        mirna_proximity.main(gff1kb_infile, mirbase_gff2, gff_mproxfeatures)

    gff_unify_features.main(gff_1kbfeatures, gff_mproxfeatures, 'mirna_prox',
                            '0', gff_ufeat1, True)

    ## PART4: compute corr
    if is_consider_corr:
        ## correlation setup:
        outdir_corr = os.path.join(outdir, 'corr')
        ensure_dir(outdir_corr, False)

        gff_mirna = os.path.join(outdir_corr, '4corr_mirna.gff')
        gff_tss = os.path.join(outdir_corr, '4corr_tss.gff')
        pair_pos = os.path.join(outdir_corr, '4corrPair_row_pos_tss-mirna.txt')
        pair_sample = os.path.join(outdir_corr,
                                   '4corrPair_col_sample_CAGE-sRNAseq.txt')
        fo_corr = os.path.join(outdir_corr,
                               'features_correlation-%s.gff' % corrmethod)
        gff_ufeat2 = os.path.join(outdir_tmp, 'features.1kb.mprox.corr.gff')

        ## position pair:
        correlation._find_miRNA_pos(m_mirna, mirbase_gff2, gff_mirna)
        correlation._get_tss_pos(f1_pos, gff_tss)
        if has_mirna:
            _interpret_tss_mirna_pairings(gff_infile, gff_tss, gff_mirna,
                                          pair_pos)
        else:
            correlation._get_tss_mirna_pairings(gff_tss, gff_mirna, pair_pos)

        ## sample pair:
        srnaseq_index = correlation._index_srnaseq(m_mirna)
        cage_index = _index_tcnorm(f_ids)
        correlation._get_sample_pairings(cage_index, srnaseq_index,
                                         pair_sample)

        ## compute correlation:
        correlation._compute_correlation(pair_pos, pair_sample, f_rle, m_mirna,
                                         fo_corr, corrmethod, '.')

        gff_unify_features.main(gff_ufeat1, fo_corr, 'corr', '0', gff_ufeat2,
                                True)

        gff_ufeat = gff_ufeat2
    else:
        gff_ufeat = gff_ufeat1

    findex = _index_feat(gff_ufeat, has_mirna)

    ## PART4: start consolidating features ...
    gff_allfeatures = os.path.join(outdir, 'features.gff')
    with open(gff_allfeatures, 'w') as out:
        with open(gff_infile) as f:
            for l in f:
                chrom, _, _, start, stop, _, strand, _, mirna = l.strip(
                ).split('\t')
                mirna = mirna.lower()

                ## setting ids...
                tssid = '.'.join([chrom, start, stop, strand])

                ## getting info...
                try:
                    ncount = ncount_dict[tssid]
                except KeyError:
                    ncount = '0'

                if findex.has_key(tssid):
                    for n in findex[tssid]:
                        if has_mirna:
                            m, n = n.split(':')
                            if m != mirna: continue

                        newline = linecache.getline(gff_ufeat, int(n))
                        newline = newline.split('\t')

                        newline[2] = mirna
                        newline[5] = ncount

                        out.write('\t'.join(newline))
    return gff_allfeatures
Example #8
0
import features
import numpy as np
from collections import defaultdict


def gensif(temp, f):
    infile2 = os.path.dirname(os.path.abspath(__file__)) + '/csvFiles/pred.csv'
    fh1 = open(infile2, 'w')
    for k in temp:
        k = int(k)
        print >> fh1, str(k)
    fh1.close()
    gengraph.main(f)


features.main()
tex = loadtxt('/home/aditya/Project/csvFiles/trainNLP.csv', delimiter=',')
tex_dev = loadtxt('/home/aditya/Project/csvFiles/devNLP.csv', delimiter=',')
tex_test = loadtxt('/home/aditya/Project/csvFiles/testNLP.csv', delimiter=',')
vals = tex.shape
vals_dev = tex_dev.shape
vals_test = tex_test.shape
ty = tex[:, vals[1] - 1]
tx = tex[:, 0:vals[1] - 2]
ty_dev = tex_dev[:, vals_dev[1] - 1]
tx_dev = tex_dev[:, 0:vals_dev[1] - 2]
tx_test = tex_test[:, 0:vals_test[1] - 1]
#clf=svm.SVC()
#clf.fit(tx,ty)/home/aditya/Project/dev
newtext_train = './GRN.py --a1-dir /home/aditya/Project/train --a2-dir /home/aditya/Project/train --pred-sif /home/aditya/Project/output_train.sif /home/aditya/Project/train/PMID-*.txt'
newtext_dev = './GRN.py --a1-dir /home/aditya/Project/dev --a2-dir /home/aditya/Project/dev --pred-sif /home/aditya/Project/output_dev.sif /home/aditya/Project/dev/PMID-*.txt'
Example #9
0
def main(f_config, gff_cage, is_gff, outdir, make_plots):
    cparser = SafeConfigParser()
    cparser.read(f_config)

    in_bname = os.path.basename(gff_cage)

    if outdir == None:
        outdir = 'promi2_outdir_' + in_bname + '_' + random_string(6)
    ensure_dir(outdir, False)

    f_param = cparser.get('promi2', 'params')
    listoffeatures = cparser.get('promi2', 'features')
    listoffeatures = listoffeatures.split(',')
    if 'corr' in listoffeatures:
        is_consider_corr = True
        corrmethod = cparser.get('correlation', 'corrmethod')
    else:
        is_consider_corr = False

    ## PART1: Feature extraction
    if not is_gff:
        ## feature extraction: cpg, cons, tata (features.py)
        outdir_seqfeatures = os.path.join(outdir, 'seqfeatures')
        ensure_dir(outdir_seqfeatures, False)

        gff_1kbfeatures = os.path.join(outdir_seqfeatures,
                                       'features_1kbseq.gff')

        f_fasta = cparser.get('genome', 'fasta')
        f_chromsizes = cparser.get('genome', 'chromsizes')
        d_phastcons = cparser.get('cons', 'phastcons')
        TRAP = cparser.get('tata', 'trap')
        f_psemmatrix = cparser.get('tata', 'psem')

        features.main(gff_cage, outdir_seqfeatures, f_fasta, f_chromsizes,
                      d_phastcons, TRAP, f_psemmatrix, gff_1kbfeatures)

        ## feature extraction: mirna_proximity (mirna_proximity.py)
        outdir_mprox = os.path.join(outdir, 'mprox')
        ensure_dir(outdir_mprox, False)

        gff_mirnaprox = os.path.join(outdir_mprox, 'features_mirnaprox.gff')

        gff_mirna = cparser.get('mirbase', 'gff2')

        mirna_proximity.main(gff_cage, gff_mirna, gff_mirnaprox)

        ## merge extracted features (gff_unify_features.py)
        gff_features = os.path.join(outdir, 'Features.1kb.mprox.' + in_bname)
        gff_unify_features.main(gff_1kbfeatures, gff_mirnaprox, 'mirna_prox',
                                '0', gff_features)

        if is_consider_corr:
            ## merge extracted features (gff_unify_features.py) after compute correlation
            gff_features_corr = os.path.join(
                outdir, 'Features.1kb.mprox.%s.%s' % (corrmethod, in_bname))

            outdir_corr = os.path.join(outdir, 'corr')

            m_mirna = cparser.get('correlation', 'srnaseqmatrix')
            m_tss = cparser.get('correlation', 'cageseqmatrix')

            gff_corr = correlation.main(gff_mirna, m_mirna, m_tss, corrmethod,
                                        outdir_corr)
            gff_unify_features.main(gff_features, gff_corr, 'corr', '0',
                                    gff_features_corr)

            gff_allfeatures = gff_features_corr
        else:
            gff_allfeatures = gff_features
    else:
        gff_allfeatures = gff_cage
        with open(gff_allfeatures) as f:
            l = f.readline().split('\t')
            if not (':' in l[7]):
                sys.exit('ERROR: this is not a features.gff formatted file')

    ## PART2: extract parameters & run promirna
    f_prediction = os.path.join(outdir, 'Predictions.' + in_bname + '.txt')
    print 'COMPUTING: "%s"...' % f_prediction
    promi2(f_param, listoffeatures, gff_allfeatures, f_prediction)

    ## PART3: plots
    if make_plots:
        plotdir = os.path.join(outdir, 'plots')
        ensure_dir(plotdir, False)
        plots.main(f_prediction, plotdir, f_config)
Example #10
0
import os
import gengraph
import features
import re

def gensif(temp):
	infile2=os.path.dirname(os.path.abspath(__file__))+'/csvFiles/pred.csv'
	fh1=open(infile2,'w')
	for k in temp:
		k=int(k)
		print >>fh1,str(k)
	fh1.close()
	gengraph.main()

lb=preprocessing.L
features.main()
tex=loadtxt('/home/aditya/Project/csvFiles/trainNLP.csv',delimiter=',')
tex_dev=loadtxt('/home/aditya/Project/csvFiles/devNLP.csv',delimiter=',')
tex_test=loadtxt('/home/aditya/Project/csvFiles/testNLP.csv',delimiter=',')
vals= tex.shape
vals_dev= tex_dev.shape
vals_test= tex_test.shape
ty=tex[:,vals[1]-1]
tx=tex[:,0:vals[1]-2]
ty_dev=tex_dev[:,vals_dev[1]-1]
tx_dev=tex_dev[:,0:vals_dev[1]-2]
tx_test=tex_test[:,0:vals_test[1]-1]
#clf=svm.SVC()
#clf.fit(tx,ty)/home/aditya/Project/dev
newtext='./GRN.py --a1-dir /home/aditya/Project/dev --a2-dir /home/aditya/Project/dev --pred-sif /home/aditya/Project/output.sif /home/aditya/Project/dev/PMID-*.txt'
print '....'
Example #11
0
def extractFeatures_given_gff(config, gff_infile, outdir, has_mirna, is_consider_corr):
    cparser = SafeConfigParser()
    cparser.read(config)

    tc_config = cparser.get('configs', 'tcconfig')
    m_mirna   = cparser.get('correlation', 'srnaseqmatrix')

    f_fasta      = cparser.get('genome','fasta')
    f_chromsizes = cparser.get('genome','chromsizes')
    d_phastcons  = cparser.get('cons','phastcons')
    TRAP         = cparser.get('tata','trap')
    f_psemmatrix = cparser.get('tata','psem')

    mirbase_gff2 = cparser.get('mirbase', 'gff2')
    corrmethod   = cparser.get('correlation', 'corrmethod')

    ## PART1: tc normalization
    ## 1a. setup infile
    outdir_tc = os.path.join(outdir, 'tc-norm')
    f1_pos    = os.path.join(outdir, 'f1_pos.txt')
    ensure_dir(outdir_tc)

    ## 1b. reformat infile so that can be read by tc-quantify
    _reformat_infile_gff2tcnorm(gff_infile, f1_pos)

    ## 1c. run
    fo_bed = tc_normalization.main(tc_config, f1_pos, outdir_tc)

    ncount_dict = {}
    with open(fo_bed) as f:
        for line in f:
            l = line.strip().split('\t')
            try:
                _, chrom, start, _, stop, strand = re.split('[r:.,]', l[3])
                pos = '.'.join([chrom, start, stop, strand])
                ncount_dict[pos] = l[6]
            except ValueError:
                print '#[tcBedSpltErr]: %s' % line,

    ## 1d. setup outfile
    f_rle = re.sub('max_tpm.bed$', 'tpm_rle.matrix', fo_bed)

    tcparser = SafeConfigParser()
    tcparser.read(tc_config)
    f_ids = tcparser.get('tc_normalization', 'ids')

    ## PART2: compute cpg, cons, tata ...
    outdir_seqfeatures = os.path.join(outdir, 'seqfeatures/')
    ensure_dir(outdir_seqfeatures)

    gff1kb_infile   = os.path.join(outdir_seqfeatures, 'infile_1kbseq.gff')
    gff_1kbfeatures = os.path.join(outdir_seqfeatures, 'features_1kbseq.gff')

    _reformat_tss_to_1kb(f1_pos, gff1kb_infile)

    features.main(gff1kb_infile, outdir_seqfeatures,
                  f_fasta, f_chromsizes, d_phastcons, TRAP, f_psemmatrix,
                  gff_1kbfeatures)

    ## PART3: compute mprox ...
    outdir_tmp = os.path.join(outdir, 'intermediates')
    ensure_dir(outdir_tmp, False)

    gff_mproxfeatures = os.path.join(outdir_tmp, 'features_mprox.gff')
    gff_ufeat1 = os.path.join(outdir_tmp, 'features.1kb.mprox.gff')

    if has_mirna:
        _interpret_mprox(gff_infile, mirbase_gff2, gff_mproxfeatures)
    else:
        mirna_proximity.main(gff1kb_infile, mirbase_gff2, gff_mproxfeatures)

    gff_unify_features.main(gff_1kbfeatures, gff_mproxfeatures, 'mirna_prox', '0', gff_ufeat1, True)

    ## PART4: compute corr
    if is_consider_corr:
        ## correlation setup:
        outdir_corr = os.path.join(outdir, 'corr')
        ensure_dir(outdir_corr, False)

        gff_mirna = os.path.join(outdir_corr, '4corr_mirna.gff')
        gff_tss   = os.path.join(outdir_corr, '4corr_tss.gff')
        pair_pos    = os.path.join(outdir_corr, '4corrPair_row_pos_tss-mirna.txt')
        pair_sample = os.path.join(outdir_corr, '4corrPair_col_sample_CAGE-sRNAseq.txt')
        fo_corr = os.path.join(outdir_corr, 'features_correlation-%s.gff' % corrmethod)
        gff_ufeat2 = os.path.join(outdir_tmp, 'features.1kb.mprox.corr.gff')

        ## position pair:
        correlation._find_miRNA_pos(m_mirna, mirbase_gff2, gff_mirna)
        correlation._get_tss_pos(f1_pos, gff_tss)
        if has_mirna:
            _interpret_tss_mirna_pairings(gff_infile, gff_tss, gff_mirna, pair_pos)
        else:
            correlation._get_tss_mirna_pairings(gff_tss, gff_mirna, pair_pos)

        ## sample pair:
        srnaseq_index = correlation._index_srnaseq(m_mirna)
        cage_index    = _index_tcnorm(f_ids)
        correlation._get_sample_pairings(cage_index, srnaseq_index, pair_sample)

        ## compute correlation:
        correlation._compute_correlation(pair_pos, pair_sample,
                                         f_rle, m_mirna,
                                         fo_corr, corrmethod, '.')

        gff_unify_features.main(gff_ufeat1, fo_corr, 'corr', '0', gff_ufeat2, True)

        gff_ufeat = gff_ufeat2
    else:
        gff_ufeat = gff_ufeat1

    findex = _index_feat(gff_ufeat, has_mirna)

    ## PART4: start consolidating features ...
    gff_allfeatures = os.path.join(outdir, 'features.gff')
    with open(gff_allfeatures, 'w') as out:
        with open(gff_infile) as f:
            for l in f:
                chrom, _, _, start, stop, _, strand, _, mirna = l.strip().split('\t')
                mirna = mirna.lower()

                ## setting ids...
                tssid  = '.'.join([chrom, start, stop, strand])

                ## getting info...
                try:
                    ncount = ncount_dict[tssid]
                except KeyError:
                    ncount = '0'

                if findex.has_key(tssid):
                    for n in findex[tssid]:
                        if has_mirna:
                            m, n = n.split(':')
                            if m != mirna: continue

                        newline = linecache.getline(gff_ufeat, int(n))
                        newline = newline.split('\t')

                        newline[2] = mirna
                        newline[5] = ncount

                        out.write('\t'.join(newline))
    return gff_allfeatures
Example #12
0
def main(files, outdir, N, percent_lib, is_get_id, f_config, verbose=False):
    if os.path.isdir(outdir):
        sys.exit('## ERROR: "%s" already exists' % outdir)

    cparser = SafeConfigParser()
    cparser.read(f_config)
    verbose = True

    f_mirbasegff = cparser.get('mirbase', 'gff2')
    f_chromsizes = cparser.get('genome', 'chromsizes')
    f_repeats = cparser.get('genome', 'repeats')
    f_ensembl = cparser.get('genome', 'ensemblgtf')
    f_fasta = cparser.get('genome', 'fasta')
    d_phastcons = cparser.get('cons', 'phastcons')
    TRAP = cparser.get('tata', 'trap')
    f_psemmatrix = cparser.get('tata', 'psem')
    f_traincfg = cparser.get('configs', 'tcconfig')
    m_mirna = cparser.get('correlation', 'srnaseqmatrix')
    m_tss = cparser.get('correlation', 'cageseqmatrix')
    corrmethod = cparser.get('correlation', 'corrmethod')

    f_trainingset = os.path.join(outdir, 'TrainingSet.gff')
    outdir1 = f_trainingset + '_intermediates'

    ensure_dir(outdir, False)
    ensure_dir(outdir1, False)

    _files = glob.glob(files)

    ## creating auxillary file for negative set
    f_fiveprimegff = '../data/hsa.five_prime.gff'
    if not os.path.exists(f_fiveprimegff):
        if verbose:
            print 'STATUS: creating "%s" auxillary file...' % f_fiveprimegff
        extract_tss_from_ensembl(f_ensembl, f_fiveprimegff)

    ## create training set
    gff_ts_pos = os.path.join(outdir1, 'trainingset_pos.gff')
    gff_ts_neg = os.path.join(outdir1, 'trainingset_neg.gff')
    if verbose: print 'STATUS: creating positive candidate set...'
    create_positiveset(percent_lib, _files, f_mirbasegff, N, gff_ts_pos,
                       is_get_id)
    if verbose: print 'STATUS: creating negative candidate set...'
    create_negativeset(f_chromsizes, f_repeats, f_fiveprimegff, f_traincfg, N,
                       gff_ts_neg)

    shutil.move(os.path.join(outdir1, 'tc-norm_negSet'),
                os.path.join(outdir, 'tc-norm_negSet'))

    ## feature extraction: cpg, cons, tata (features.py)
    if verbose: print 'STATUS: extracting features cpg/cons/tata...'
    gff_1kbfeatures_pos = os.path.join(outdir1, 'features1kb_ts_pos.gff')
    gff_1kbfeatures_neg = os.path.join(outdir1, 'features1kb_ts_neg.gff')

    features.main(gff_ts_pos, outdir1, f_fasta, f_chromsizes, d_phastcons,
                  TRAP, f_psemmatrix, gff_1kbfeatures_pos)

    features.main(gff_ts_neg, outdir1, f_fasta, f_chromsizes, d_phastcons,
                  TRAP, f_psemmatrix, gff_1kbfeatures_neg)

    ## feature extraction: mirna_proximity
    if verbose: print 'STATUS: extracting features mirna_proximity...'
    gff_mirnaprox_pos = os.path.join(outdir1, 'featureMprox_ts_pos.gff')
    gff_mirnaprox_neg = os.path.join(outdir1, 'featureMprox_ts_neg.gff')
    mirna_proximity.main(gff_ts_pos, f_mirbasegff, gff_mirnaprox_pos)
    mirna_proximity.main(gff_ts_neg, f_mirbasegff, gff_mirnaprox_neg)

    gff_features_pos = os.path.join(outdir1, 'Features_ts_pos.gff')
    gff_features_neg = os.path.join(outdir1, 'Features_ts_neg.gff')
    gff_unify_features.main(gff_1kbfeatures_pos, gff_mirnaprox_pos,
                            'mirna_prox', '0', gff_features_pos, True)
    gff_unify_features.main(gff_1kbfeatures_neg, gff_mirnaprox_neg,
                            'mirna_prox', '0', gff_features_neg, True)

    ## create final training set ...
    ## where background must pass criteria: cpg <= 0.5 and cons <= 0.2 and tata <= 0.1 and mirna_prox == 0
    if verbose: print 'STATUS: creating final training set...'
    good_background = gff_features_neg + '_cpglt0.5-conslt0.2-tatalt0.1-mproxeq0.gff'
    with open(good_background, 'w') as out:
        with open(gff_features_neg) as f:
            for line in f:
                info = line.strip().split('\t')[7].split(';')
                cpg = float(get_value_from_keycolonvalue_list('cpg', info))
                cons = float(get_value_from_keycolonvalue_list('cons', info))
                tata = float(get_value_from_keycolonvalue_list('tata', info))
                mprx = float(
                    get_value_from_keycolonvalue_list('mirna_prox', info))

                if cpg <= 0.5 and cons <= 0.2 and tata <= 0.1 and mprx == 0:
                    out.write(line)

    wc = line_count(good_background)
    selectedlines = random.sample(range(1, wc + 1), N)

    with open(f_trainingset, 'w') as out:
        ## writing negative set
        for l in selectedlines:
            out.write(linecache.getline(good_background, l))

        ## writing positive set
        with open(gff_features_pos) as f:
            ## when mirna_prox extraction feature was used,
            ## extracted all pairs within 50kb upstream mirna
            ## -> single tss could have many mirna
            ## take pair with min distance
            ## -> essential first entry
            pos_list = []
            for line in f:
                l = line.split('\t')
                pos = ','.join([l[0], l[3], l[4], l[6]])
                if not (pos in pos_list):
                    pos_list.append(pos)
                    out.write(line)

    if not (os.path.isfile(m_mirna) and os.path.isfile(m_tss)):
        return f_trainingset

    ## create final training set with feature:correlation of closest tss->miRNA ...
    if verbose:
        print 'STATUS: creating final training set with correlation of closest tss->miRNA...'
    f_trainingset2 = os.path.join(outdir, 'TrainingSet-corr.gff')
    m_back = glob.glob('%s/tc-norm_negSet/*tpm_rle.matrix' % outdir)[0]
    f_tcfilesinput = os.path.join(outdir, 'tc-norm_negSet', 'files.txt')

    feature_closest_corr(f_trainingset, f_mirbasegff, m_mirna, m_tss, m_back,
                         f_tcfilesinput, corrmethod, f_trainingset2)

    return f_trainingset2
import features

print("Hello")

url = input("Please Enter the URL and press enter to proceed : ")

# url = "http://ebay.co.uk"

response = features.main(url)
print(response)