def make_listing_clusters(): #for now #shape up features import features #make big and small features (stored in database) features.main() #really only need to make fancy features #cluster import listing_cluster listing_cluster.main()
def stream_handler(message): print(message["path"]) # /-K7yGTTEp7O549EzTYtI print(message["data"]) # {'title': 'Pyrebase', "body": "etc..."} entry_len = len(message["path"]) entry = message["path"] if (message and message['data'] != None): if (entry_len > 1 and (entry[entry_len - 1] + entry[entry_len - 2] + entry[entry_len - 3] != 'mP/')): newpath = r"C:\Users\HP\Desktop\%s" % message["path"] if not os.path.exists(newpath): os.makedirs(newpath) time.sleep(2) st.child("uploads/air/394.jpg").download( r"C:\Users\HP\Desktop\%s\1.jpg" % message["path"]) features.main() nearest_loc.main()
def main(): url = sys.argv[1] features_test = features.main(url) clf = joblib.load('random_forest.pkl') pred = clf.predict(features_test) if int(pred[0]) == 1: print("Website ini aman") elif int(pred[0]) == -1: print("Website ini tidak aman!")
def main(): url=sys.argv[1] features_test=features.main(url) clf = joblib.load('random_forest.pkl') pred=clf.predict(features_test) prob=clf.predict_proba(features_test) if int(pred[0])==1: print ("This is a safe website.") elif int(pred[0])==-1: print ("This is a phishing website..!")
import pandas as pd import numpy as np import matplotlib.pyplot as plt import sys ### import evaluation import check_tests as ct import features ### from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn.metrics import roc_curve, auc train = pd.read_csv('../data/training.csv', index_col='id') subset = [1,2,3,4,5] variables = train.columns[subset] trained_model = features.main('rf', variables) ct.agreement(trained_model, variables) ct.correlation(trained_model, variables) ct.weightedAuc(trained_model, variables, train)
def extractFeatures_given_gff(config, gff_infile, outdir, has_mirna, is_consider_corr): cparser = SafeConfigParser() cparser.read(config) tc_config = cparser.get('configs', 'tcconfig') m_mirna = cparser.get('correlation', 'srnaseqmatrix') f_fasta = cparser.get('genome', 'fasta') f_chromsizes = cparser.get('genome', 'chromsizes') d_phastcons = cparser.get('cons', 'phastcons') TRAP = cparser.get('tata', 'trap') f_psemmatrix = cparser.get('tata', 'psem') mirbase_gff2 = cparser.get('mirbase', 'gff2') corrmethod = cparser.get('correlation', 'corrmethod') ## PART1: tc normalization ## 1a. setup infile outdir_tc = os.path.join(outdir, 'tc-norm') f1_pos = os.path.join(outdir, 'f1_pos.txt') ensure_dir(outdir_tc) ## 1b. reformat infile so that can be read by tc-quantify _reformat_infile_gff2tcnorm(gff_infile, f1_pos) ## 1c. run fo_bed = tc_normalization.main(tc_config, f1_pos, outdir_tc) ncount_dict = {} with open(fo_bed) as f: for line in f: l = line.strip().split('\t') try: _, chrom, start, _, stop, strand = re.split('[r:.,]', l[3]) pos = '.'.join([chrom, start, stop, strand]) ncount_dict[pos] = l[6] except ValueError: print '#[tcBedSpltErr]: %s' % line, ## 1d. setup outfile f_rle = re.sub('max_tpm.bed$', 'tpm_rle.matrix', fo_bed) tcparser = SafeConfigParser() tcparser.read(tc_config) f_ids = tcparser.get('tc_normalization', 'ids') ## PART2: compute cpg, cons, tata ... outdir_seqfeatures = os.path.join(outdir, 'seqfeatures/') ensure_dir(outdir_seqfeatures) gff1kb_infile = os.path.join(outdir_seqfeatures, 'infile_1kbseq.gff') gff_1kbfeatures = os.path.join(outdir_seqfeatures, 'features_1kbseq.gff') _reformat_tss_to_1kb(f1_pos, gff1kb_infile) features.main(gff1kb_infile, outdir_seqfeatures, f_fasta, f_chromsizes, d_phastcons, TRAP, f_psemmatrix, gff_1kbfeatures) ## PART3: compute mprox ... outdir_tmp = os.path.join(outdir, 'intermediates') ensure_dir(outdir_tmp, False) gff_mproxfeatures = os.path.join(outdir_tmp, 'features_mprox.gff') gff_ufeat1 = os.path.join(outdir_tmp, 'features.1kb.mprox.gff') if has_mirna: _interpret_mprox(gff_infile, mirbase_gff2, gff_mproxfeatures) else: mirna_proximity.main(gff1kb_infile, mirbase_gff2, gff_mproxfeatures) gff_unify_features.main(gff_1kbfeatures, gff_mproxfeatures, 'mirna_prox', '0', gff_ufeat1, True) ## PART4: compute corr if is_consider_corr: ## correlation setup: outdir_corr = os.path.join(outdir, 'corr') ensure_dir(outdir_corr, False) gff_mirna = os.path.join(outdir_corr, '4corr_mirna.gff') gff_tss = os.path.join(outdir_corr, '4corr_tss.gff') pair_pos = os.path.join(outdir_corr, '4corrPair_row_pos_tss-mirna.txt') pair_sample = os.path.join(outdir_corr, '4corrPair_col_sample_CAGE-sRNAseq.txt') fo_corr = os.path.join(outdir_corr, 'features_correlation-%s.gff' % corrmethod) gff_ufeat2 = os.path.join(outdir_tmp, 'features.1kb.mprox.corr.gff') ## position pair: correlation._find_miRNA_pos(m_mirna, mirbase_gff2, gff_mirna) correlation._get_tss_pos(f1_pos, gff_tss) if has_mirna: _interpret_tss_mirna_pairings(gff_infile, gff_tss, gff_mirna, pair_pos) else: correlation._get_tss_mirna_pairings(gff_tss, gff_mirna, pair_pos) ## sample pair: srnaseq_index = correlation._index_srnaseq(m_mirna) cage_index = _index_tcnorm(f_ids) correlation._get_sample_pairings(cage_index, srnaseq_index, pair_sample) ## compute correlation: correlation._compute_correlation(pair_pos, pair_sample, f_rle, m_mirna, fo_corr, corrmethod, '.') gff_unify_features.main(gff_ufeat1, fo_corr, 'corr', '0', gff_ufeat2, True) gff_ufeat = gff_ufeat2 else: gff_ufeat = gff_ufeat1 findex = _index_feat(gff_ufeat, has_mirna) ## PART4: start consolidating features ... gff_allfeatures = os.path.join(outdir, 'features.gff') with open(gff_allfeatures, 'w') as out: with open(gff_infile) as f: for l in f: chrom, _, _, start, stop, _, strand, _, mirna = l.strip( ).split('\t') mirna = mirna.lower() ## setting ids... tssid = '.'.join([chrom, start, stop, strand]) ## getting info... try: ncount = ncount_dict[tssid] except KeyError: ncount = '0' if findex.has_key(tssid): for n in findex[tssid]: if has_mirna: m, n = n.split(':') if m != mirna: continue newline = linecache.getline(gff_ufeat, int(n)) newline = newline.split('\t') newline[2] = mirna newline[5] = ncount out.write('\t'.join(newline)) return gff_allfeatures
import features import numpy as np from collections import defaultdict def gensif(temp, f): infile2 = os.path.dirname(os.path.abspath(__file__)) + '/csvFiles/pred.csv' fh1 = open(infile2, 'w') for k in temp: k = int(k) print >> fh1, str(k) fh1.close() gengraph.main(f) features.main() tex = loadtxt('/home/aditya/Project/csvFiles/trainNLP.csv', delimiter=',') tex_dev = loadtxt('/home/aditya/Project/csvFiles/devNLP.csv', delimiter=',') tex_test = loadtxt('/home/aditya/Project/csvFiles/testNLP.csv', delimiter=',') vals = tex.shape vals_dev = tex_dev.shape vals_test = tex_test.shape ty = tex[:, vals[1] - 1] tx = tex[:, 0:vals[1] - 2] ty_dev = tex_dev[:, vals_dev[1] - 1] tx_dev = tex_dev[:, 0:vals_dev[1] - 2] tx_test = tex_test[:, 0:vals_test[1] - 1] #clf=svm.SVC() #clf.fit(tx,ty)/home/aditya/Project/dev newtext_train = './GRN.py --a1-dir /home/aditya/Project/train --a2-dir /home/aditya/Project/train --pred-sif /home/aditya/Project/output_train.sif /home/aditya/Project/train/PMID-*.txt' newtext_dev = './GRN.py --a1-dir /home/aditya/Project/dev --a2-dir /home/aditya/Project/dev --pred-sif /home/aditya/Project/output_dev.sif /home/aditya/Project/dev/PMID-*.txt'
def main(f_config, gff_cage, is_gff, outdir, make_plots): cparser = SafeConfigParser() cparser.read(f_config) in_bname = os.path.basename(gff_cage) if outdir == None: outdir = 'promi2_outdir_' + in_bname + '_' + random_string(6) ensure_dir(outdir, False) f_param = cparser.get('promi2', 'params') listoffeatures = cparser.get('promi2', 'features') listoffeatures = listoffeatures.split(',') if 'corr' in listoffeatures: is_consider_corr = True corrmethod = cparser.get('correlation', 'corrmethod') else: is_consider_corr = False ## PART1: Feature extraction if not is_gff: ## feature extraction: cpg, cons, tata (features.py) outdir_seqfeatures = os.path.join(outdir, 'seqfeatures') ensure_dir(outdir_seqfeatures, False) gff_1kbfeatures = os.path.join(outdir_seqfeatures, 'features_1kbseq.gff') f_fasta = cparser.get('genome', 'fasta') f_chromsizes = cparser.get('genome', 'chromsizes') d_phastcons = cparser.get('cons', 'phastcons') TRAP = cparser.get('tata', 'trap') f_psemmatrix = cparser.get('tata', 'psem') features.main(gff_cage, outdir_seqfeatures, f_fasta, f_chromsizes, d_phastcons, TRAP, f_psemmatrix, gff_1kbfeatures) ## feature extraction: mirna_proximity (mirna_proximity.py) outdir_mprox = os.path.join(outdir, 'mprox') ensure_dir(outdir_mprox, False) gff_mirnaprox = os.path.join(outdir_mprox, 'features_mirnaprox.gff') gff_mirna = cparser.get('mirbase', 'gff2') mirna_proximity.main(gff_cage, gff_mirna, gff_mirnaprox) ## merge extracted features (gff_unify_features.py) gff_features = os.path.join(outdir, 'Features.1kb.mprox.' + in_bname) gff_unify_features.main(gff_1kbfeatures, gff_mirnaprox, 'mirna_prox', '0', gff_features) if is_consider_corr: ## merge extracted features (gff_unify_features.py) after compute correlation gff_features_corr = os.path.join( outdir, 'Features.1kb.mprox.%s.%s' % (corrmethod, in_bname)) outdir_corr = os.path.join(outdir, 'corr') m_mirna = cparser.get('correlation', 'srnaseqmatrix') m_tss = cparser.get('correlation', 'cageseqmatrix') gff_corr = correlation.main(gff_mirna, m_mirna, m_tss, corrmethod, outdir_corr) gff_unify_features.main(gff_features, gff_corr, 'corr', '0', gff_features_corr) gff_allfeatures = gff_features_corr else: gff_allfeatures = gff_features else: gff_allfeatures = gff_cage with open(gff_allfeatures) as f: l = f.readline().split('\t') if not (':' in l[7]): sys.exit('ERROR: this is not a features.gff formatted file') ## PART2: extract parameters & run promirna f_prediction = os.path.join(outdir, 'Predictions.' + in_bname + '.txt') print 'COMPUTING: "%s"...' % f_prediction promi2(f_param, listoffeatures, gff_allfeatures, f_prediction) ## PART3: plots if make_plots: plotdir = os.path.join(outdir, 'plots') ensure_dir(plotdir, False) plots.main(f_prediction, plotdir, f_config)
import os import gengraph import features import re def gensif(temp): infile2=os.path.dirname(os.path.abspath(__file__))+'/csvFiles/pred.csv' fh1=open(infile2,'w') for k in temp: k=int(k) print >>fh1,str(k) fh1.close() gengraph.main() lb=preprocessing.L features.main() tex=loadtxt('/home/aditya/Project/csvFiles/trainNLP.csv',delimiter=',') tex_dev=loadtxt('/home/aditya/Project/csvFiles/devNLP.csv',delimiter=',') tex_test=loadtxt('/home/aditya/Project/csvFiles/testNLP.csv',delimiter=',') vals= tex.shape vals_dev= tex_dev.shape vals_test= tex_test.shape ty=tex[:,vals[1]-1] tx=tex[:,0:vals[1]-2] ty_dev=tex_dev[:,vals_dev[1]-1] tx_dev=tex_dev[:,0:vals_dev[1]-2] tx_test=tex_test[:,0:vals_test[1]-1] #clf=svm.SVC() #clf.fit(tx,ty)/home/aditya/Project/dev newtext='./GRN.py --a1-dir /home/aditya/Project/dev --a2-dir /home/aditya/Project/dev --pred-sif /home/aditya/Project/output.sif /home/aditya/Project/dev/PMID-*.txt' print '....'
def extractFeatures_given_gff(config, gff_infile, outdir, has_mirna, is_consider_corr): cparser = SafeConfigParser() cparser.read(config) tc_config = cparser.get('configs', 'tcconfig') m_mirna = cparser.get('correlation', 'srnaseqmatrix') f_fasta = cparser.get('genome','fasta') f_chromsizes = cparser.get('genome','chromsizes') d_phastcons = cparser.get('cons','phastcons') TRAP = cparser.get('tata','trap') f_psemmatrix = cparser.get('tata','psem') mirbase_gff2 = cparser.get('mirbase', 'gff2') corrmethod = cparser.get('correlation', 'corrmethod') ## PART1: tc normalization ## 1a. setup infile outdir_tc = os.path.join(outdir, 'tc-norm') f1_pos = os.path.join(outdir, 'f1_pos.txt') ensure_dir(outdir_tc) ## 1b. reformat infile so that can be read by tc-quantify _reformat_infile_gff2tcnorm(gff_infile, f1_pos) ## 1c. run fo_bed = tc_normalization.main(tc_config, f1_pos, outdir_tc) ncount_dict = {} with open(fo_bed) as f: for line in f: l = line.strip().split('\t') try: _, chrom, start, _, stop, strand = re.split('[r:.,]', l[3]) pos = '.'.join([chrom, start, stop, strand]) ncount_dict[pos] = l[6] except ValueError: print '#[tcBedSpltErr]: %s' % line, ## 1d. setup outfile f_rle = re.sub('max_tpm.bed$', 'tpm_rle.matrix', fo_bed) tcparser = SafeConfigParser() tcparser.read(tc_config) f_ids = tcparser.get('tc_normalization', 'ids') ## PART2: compute cpg, cons, tata ... outdir_seqfeatures = os.path.join(outdir, 'seqfeatures/') ensure_dir(outdir_seqfeatures) gff1kb_infile = os.path.join(outdir_seqfeatures, 'infile_1kbseq.gff') gff_1kbfeatures = os.path.join(outdir_seqfeatures, 'features_1kbseq.gff') _reformat_tss_to_1kb(f1_pos, gff1kb_infile) features.main(gff1kb_infile, outdir_seqfeatures, f_fasta, f_chromsizes, d_phastcons, TRAP, f_psemmatrix, gff_1kbfeatures) ## PART3: compute mprox ... outdir_tmp = os.path.join(outdir, 'intermediates') ensure_dir(outdir_tmp, False) gff_mproxfeatures = os.path.join(outdir_tmp, 'features_mprox.gff') gff_ufeat1 = os.path.join(outdir_tmp, 'features.1kb.mprox.gff') if has_mirna: _interpret_mprox(gff_infile, mirbase_gff2, gff_mproxfeatures) else: mirna_proximity.main(gff1kb_infile, mirbase_gff2, gff_mproxfeatures) gff_unify_features.main(gff_1kbfeatures, gff_mproxfeatures, 'mirna_prox', '0', gff_ufeat1, True) ## PART4: compute corr if is_consider_corr: ## correlation setup: outdir_corr = os.path.join(outdir, 'corr') ensure_dir(outdir_corr, False) gff_mirna = os.path.join(outdir_corr, '4corr_mirna.gff') gff_tss = os.path.join(outdir_corr, '4corr_tss.gff') pair_pos = os.path.join(outdir_corr, '4corrPair_row_pos_tss-mirna.txt') pair_sample = os.path.join(outdir_corr, '4corrPair_col_sample_CAGE-sRNAseq.txt') fo_corr = os.path.join(outdir_corr, 'features_correlation-%s.gff' % corrmethod) gff_ufeat2 = os.path.join(outdir_tmp, 'features.1kb.mprox.corr.gff') ## position pair: correlation._find_miRNA_pos(m_mirna, mirbase_gff2, gff_mirna) correlation._get_tss_pos(f1_pos, gff_tss) if has_mirna: _interpret_tss_mirna_pairings(gff_infile, gff_tss, gff_mirna, pair_pos) else: correlation._get_tss_mirna_pairings(gff_tss, gff_mirna, pair_pos) ## sample pair: srnaseq_index = correlation._index_srnaseq(m_mirna) cage_index = _index_tcnorm(f_ids) correlation._get_sample_pairings(cage_index, srnaseq_index, pair_sample) ## compute correlation: correlation._compute_correlation(pair_pos, pair_sample, f_rle, m_mirna, fo_corr, corrmethod, '.') gff_unify_features.main(gff_ufeat1, fo_corr, 'corr', '0', gff_ufeat2, True) gff_ufeat = gff_ufeat2 else: gff_ufeat = gff_ufeat1 findex = _index_feat(gff_ufeat, has_mirna) ## PART4: start consolidating features ... gff_allfeatures = os.path.join(outdir, 'features.gff') with open(gff_allfeatures, 'w') as out: with open(gff_infile) as f: for l in f: chrom, _, _, start, stop, _, strand, _, mirna = l.strip().split('\t') mirna = mirna.lower() ## setting ids... tssid = '.'.join([chrom, start, stop, strand]) ## getting info... try: ncount = ncount_dict[tssid] except KeyError: ncount = '0' if findex.has_key(tssid): for n in findex[tssid]: if has_mirna: m, n = n.split(':') if m != mirna: continue newline = linecache.getline(gff_ufeat, int(n)) newline = newline.split('\t') newline[2] = mirna newline[5] = ncount out.write('\t'.join(newline)) return gff_allfeatures
def main(files, outdir, N, percent_lib, is_get_id, f_config, verbose=False): if os.path.isdir(outdir): sys.exit('## ERROR: "%s" already exists' % outdir) cparser = SafeConfigParser() cparser.read(f_config) verbose = True f_mirbasegff = cparser.get('mirbase', 'gff2') f_chromsizes = cparser.get('genome', 'chromsizes') f_repeats = cparser.get('genome', 'repeats') f_ensembl = cparser.get('genome', 'ensemblgtf') f_fasta = cparser.get('genome', 'fasta') d_phastcons = cparser.get('cons', 'phastcons') TRAP = cparser.get('tata', 'trap') f_psemmatrix = cparser.get('tata', 'psem') f_traincfg = cparser.get('configs', 'tcconfig') m_mirna = cparser.get('correlation', 'srnaseqmatrix') m_tss = cparser.get('correlation', 'cageseqmatrix') corrmethod = cparser.get('correlation', 'corrmethod') f_trainingset = os.path.join(outdir, 'TrainingSet.gff') outdir1 = f_trainingset + '_intermediates' ensure_dir(outdir, False) ensure_dir(outdir1, False) _files = glob.glob(files) ## creating auxillary file for negative set f_fiveprimegff = '../data/hsa.five_prime.gff' if not os.path.exists(f_fiveprimegff): if verbose: print 'STATUS: creating "%s" auxillary file...' % f_fiveprimegff extract_tss_from_ensembl(f_ensembl, f_fiveprimegff) ## create training set gff_ts_pos = os.path.join(outdir1, 'trainingset_pos.gff') gff_ts_neg = os.path.join(outdir1, 'trainingset_neg.gff') if verbose: print 'STATUS: creating positive candidate set...' create_positiveset(percent_lib, _files, f_mirbasegff, N, gff_ts_pos, is_get_id) if verbose: print 'STATUS: creating negative candidate set...' create_negativeset(f_chromsizes, f_repeats, f_fiveprimegff, f_traincfg, N, gff_ts_neg) shutil.move(os.path.join(outdir1, 'tc-norm_negSet'), os.path.join(outdir, 'tc-norm_negSet')) ## feature extraction: cpg, cons, tata (features.py) if verbose: print 'STATUS: extracting features cpg/cons/tata...' gff_1kbfeatures_pos = os.path.join(outdir1, 'features1kb_ts_pos.gff') gff_1kbfeatures_neg = os.path.join(outdir1, 'features1kb_ts_neg.gff') features.main(gff_ts_pos, outdir1, f_fasta, f_chromsizes, d_phastcons, TRAP, f_psemmatrix, gff_1kbfeatures_pos) features.main(gff_ts_neg, outdir1, f_fasta, f_chromsizes, d_phastcons, TRAP, f_psemmatrix, gff_1kbfeatures_neg) ## feature extraction: mirna_proximity if verbose: print 'STATUS: extracting features mirna_proximity...' gff_mirnaprox_pos = os.path.join(outdir1, 'featureMprox_ts_pos.gff') gff_mirnaprox_neg = os.path.join(outdir1, 'featureMprox_ts_neg.gff') mirna_proximity.main(gff_ts_pos, f_mirbasegff, gff_mirnaprox_pos) mirna_proximity.main(gff_ts_neg, f_mirbasegff, gff_mirnaprox_neg) gff_features_pos = os.path.join(outdir1, 'Features_ts_pos.gff') gff_features_neg = os.path.join(outdir1, 'Features_ts_neg.gff') gff_unify_features.main(gff_1kbfeatures_pos, gff_mirnaprox_pos, 'mirna_prox', '0', gff_features_pos, True) gff_unify_features.main(gff_1kbfeatures_neg, gff_mirnaprox_neg, 'mirna_prox', '0', gff_features_neg, True) ## create final training set ... ## where background must pass criteria: cpg <= 0.5 and cons <= 0.2 and tata <= 0.1 and mirna_prox == 0 if verbose: print 'STATUS: creating final training set...' good_background = gff_features_neg + '_cpglt0.5-conslt0.2-tatalt0.1-mproxeq0.gff' with open(good_background, 'w') as out: with open(gff_features_neg) as f: for line in f: info = line.strip().split('\t')[7].split(';') cpg = float(get_value_from_keycolonvalue_list('cpg', info)) cons = float(get_value_from_keycolonvalue_list('cons', info)) tata = float(get_value_from_keycolonvalue_list('tata', info)) mprx = float( get_value_from_keycolonvalue_list('mirna_prox', info)) if cpg <= 0.5 and cons <= 0.2 and tata <= 0.1 and mprx == 0: out.write(line) wc = line_count(good_background) selectedlines = random.sample(range(1, wc + 1), N) with open(f_trainingset, 'w') as out: ## writing negative set for l in selectedlines: out.write(linecache.getline(good_background, l)) ## writing positive set with open(gff_features_pos) as f: ## when mirna_prox extraction feature was used, ## extracted all pairs within 50kb upstream mirna ## -> single tss could have many mirna ## take pair with min distance ## -> essential first entry pos_list = [] for line in f: l = line.split('\t') pos = ','.join([l[0], l[3], l[4], l[6]]) if not (pos in pos_list): pos_list.append(pos) out.write(line) if not (os.path.isfile(m_mirna) and os.path.isfile(m_tss)): return f_trainingset ## create final training set with feature:correlation of closest tss->miRNA ... if verbose: print 'STATUS: creating final training set with correlation of closest tss->miRNA...' f_trainingset2 = os.path.join(outdir, 'TrainingSet-corr.gff') m_back = glob.glob('%s/tc-norm_negSet/*tpm_rle.matrix' % outdir)[0] f_tcfilesinput = os.path.join(outdir, 'tc-norm_negSet', 'files.txt') feature_closest_corr(f_trainingset, f_mirbasegff, m_mirna, m_tss, m_back, f_tcfilesinput, corrmethod, f_trainingset2) return f_trainingset2
import features print("Hello") url = input("Please Enter the URL and press enter to proceed : ") # url = "http://ebay.co.uk" response = features.main(url) print(response)