def analyse_all(filename_pattern):
    import glob
    from analyse import analyse
    datafiles = glob.glob(filename_pattern)
    for filename in datafiles:
        print filename
        analyse(filename)
def process(clean_word):
    w, new = word.get_word(clean_word)
    if w.completed():
        logging.info('Processing COMPLETED for %s' % clean_word)
        return

    state = status.get_latest_state(clean_word)

    logging.info('PROCESSING %s [%i]' % (w.word(), state))

    # Dispatch word to appropriate processing stage.
    if state < 10:
        analyse.analyse(w, state)
    elif state < 20:
        generate.generate(w, state)
    else:
        # Completion state!
        state = 808


    # Persist latest word payload if needed.
    w.persist_payload()

    # Queue up next processing step if required.
    if not w.completed():
        deferred.defer(process, clean_word)
Exemple #3
0
def archive(args):
	realoutput = ''
	output = ''
	inputs = []
	uncompiled = []
	skipping = True;
	for arg in args[1:]:
		if len(realoutput) == 0 and (arg[0] == '-' or len(arg) == 1):
			continue;
		elif (len(realoutput) == 0):
			realoutput = os.path.abspath(arg)
			output = callconfig.cachefile(realoutput)
		else:
			inputs.append(arg)

	if not len(inputs):
		return

	makecachedir(output)
	print "callcatcher - detecting archiving:"
	print "\tautojoining", \
		realoutput, "from\n\t", inputs
	combine.combine(output, inputs)
	print "callcatcher - dump currently unused:"
	print "\tUse \"callanalyse\" to manually analyse a set of compiler output files"
	print "\tautoanalysing", realoutput
	print "\tCurrently unused functions are..."
	analyse.analyse(output, "\t\t")
Exemple #4
0
def process(clean_word):
    w, new = word.get_word(clean_word)
    if w.completed():
        logging.info('Processing COMPLETED for %s' % clean_word)
        return

    state = status.get_latest_state(clean_word)

    logging.info('PROCESSING %s [%i]' % (w.word(), state))

    # Dispatch word to appropriate processing stage.
    if state < 10:
        analyse.analyse(w, state)
    elif state < 20:
        generate.generate(w, state)
    else:
        # Completion state!
        state = 808

    # Persist latest word payload if needed.
    w.persist_payload()

    # Queue up next processing step if required.
    if not w.completed():
        deferred.defer(process, clean_word)
Exemple #5
0
def archive(args):
    realoutput = ''
    output = ''
    inputs = []
    uncompiled = []
    skipping = True
    for arg in args[1:]:
        if len(realoutput) == 0 and (arg[0] == '-' or len(arg) == 1):
            continue
        elif (len(realoutput) == 0):
            realoutput = os.path.abspath(arg)
            output = callconfig.cachefile(realoutput)
        else:
            inputs.append(arg)

    if not len(inputs):
        return

    makecachedir(output)
    print "callcatcher - detecting archiving:"
    print "\tautojoining", \
     realoutput, "from\n\t", inputs
    combine.combine(output, inputs)
    print "callcatcher - dump currently unused:"
    print "\tUse \"callanalyse\" to manually analyse a set of compiler output files"
    print "\tautoanalysing", realoutput
    print "\tCurrently unused functions are..."
    analyse.analyse(output, "\t\t")
Exemple #6
0
def graph():
    fig = Figure()
    global ListPart
    l = 0
    k = 0
    j = 0

    if (str)(interface.getvar(name="crowd_type")) == "Hétérogène":
        for i in range(0, len(ListPart)):
            if ListPart[i].name == "Enfant":
                k = k + 1
            if ListPart[i].name == "Adulte":
                j = j + 1
            if ListPart[i].name == "Ancien":
                l = l + 1
        print(k)
        t = ("Enfant", "Adulte", "Ancien")
        Y = (k, j, l)
        fig.add_subplot(111).plot(t, Y)
        canvas = FigureCanvasTkAgg(fig, master=Simulation)  # A tk.DrawingArea.
        canvas.draw()
        canvas.get_tk_widget().grid(row=1, column=2)
    else:
        pass
        ## Because in this case we have a pop which is Homogeneous

    analyse()
Exemple #7
0
def process(line):
    # tokenise line
    # run POS tagger on it
    # convert to conllu
    # extract useful parts
    pos_tagged = run_irishfst(line)
    conll = irishfst_output_to_conll(pos_tagged)
    deps = parse_dependencies(conll)
    analyse.analyse(deps)
def main():
    list_of_1100_links = le.get_all_1000_links(
        'https://www.goodreads.com/list/show/6.Best_Books_of_the_20th_Century?page='
    )
    df_all = s_s.create_df_and_save_as_csv(list_of_1100_links)

    #### other version
    #df_all = s_s.create_df_and_save_as_csv(le.get_all_1000_links('https://www.goodreads.com/list/show/6.Best_Books_of_the_20th_Century?page='))

    df_cleaned = preprocessing(df_all)
    analyse(df_cleaned, df_all)
Exemple #9
0
def main():
    """
    Main function of the script
    """
    args = getArgs()

    # User specifies motif size range instead of giving a repeats file
    if args.repeats is None:
        min_motif_size = args.min_motif_size
        max_motif_size = args.max_motif_size
        args.repeats = generate_repeats(min_motif_size, max_motif_size)

    # User specifies minimum length
    if args.min_length:
        getSSRNative(args)

    # User specific minimum number of units
    elif args.min_units:
        unit_cutoff = dict()
        try:
            args.min_units = int(args.min_units)
            unit_cutoff[0] = args.min_units
        except ValueError:
            try:
                with open(args.min_units, 'r') as unitsIn:
                    for line in unitsIn:
                        L = line.strip().split()
                        try:
                            L[0] = int(L[0])
                            L[1] = int(L[1])
                            if L[1] == 1:
                                print(
                                    'Warning: Repeat unit of 1 used for size %d.'
                                    % (L[0]),
                                    file=sys.stderr)
                            unit_cutoff[L[0]] = L[1]
                        except ValueError:
                            sys.exit(
                                'Invalid file format given for minimum units. Refer to help for more details'
                            )
            except FileNotFoundError:
                sys.exit(
                    'Units file specified is not found. Please provide a valid file'
                )
        getSSR_units(args, unit_cutoff)

    # Default settings
    elif args.min_length is None and args.min_units is None:
        args.min_length = 12
        getSSRNative(args)

    # Specifies to generate a HTML report
    if args.analyse:
        analyse(args)
Exemple #10
0
def link(args):
    realoutput = abslinkoutput(args)
    output = callconfig.cachefile(realoutput)
    inputs = []
    fakeargs = [
        args[0],
    ]
    uncompiled = []
    skip = False
    for arg in args[1:]:
        if skip:
            skip = False
            continue
        if arg[0] == '-' and len(arg) > 1 and arg[1] != 'o':
            if arg[1] == 'l':
                print 'linking against lib' + arg[2:] + '[.so|.a]'
            fakeargs.append(arg)
        elif arg == '-o':
            skip = True
        else:
            name, suffix = os.path.splitext(arg)
            if suffix == '.c' or suffix == '.cc' \
             or suffix == '.cp' or suffix == '.cxx' \
             or suffix == '.cpp' or suffix == '.CPP' \
             or suffix == '.c++' or suffix == '.C' \
             or suffix == '.s':
                inputs.append(name + '.o')
                uncompiled.append(arg)
            else:
                inputs.append(arg)

    if len(uncompiled):
        print 'callcatcher - linkline contains source files, forcing',\
         'compile of:'
        print '\t', uncompiled
        fakeargs.append('-c')
        for uncompile in uncompiled:
            compileline = fakeargs
            compileline.append(uncompile)
            compile(compileline)

    if not len(inputs):
        return

    makecachedir(output)
    print "callcatcher - detecting link:"
    print "\tautojoining", \
     realoutput, "from\n\t", inputs
    combine.combine(output, inputs)
    print "callcatcher - dump currently unused:"
    print "\tUse \"callanalyse\" to manually analyse a set of compiler output files"
    print "\tautoanalysing", realoutput
    print "\tCurrently unused functions are..."
    analyse.analyse(output, "\t\t")
Exemple #11
0
def monitor():

    monitoring = True
    while (monitoring):
        count = (randint(1, 100))
        if (count >= 3):
            print("Good value: {}".format(count))
        else:
            print("Bad value: {}".format(count))
            analyse()
            monitoring = False
Exemple #12
0
def link(args):
	realoutput = abslinkoutput(args)
	output = callconfig.cachefile(realoutput)
	inputs = []
	fakeargs = [ args[0], ]
	uncompiled = []
	skip = False
	for arg in args[1:]:
		if skip:
			skip = False
			continue
		if arg[0] == '-' and len(arg) > 1 and arg[1] != 'o':
			if arg[1] == 'l':
				print 'linking against lib' + arg[2:] + '[.so|.a]'
			fakeargs.append(arg)
		elif arg == '-o':
			skip = True
		else:
			name, suffix = os.path.splitext(arg)
			if suffix == '.c' or suffix == '.cc' \
				or suffix == '.cp' or suffix == '.cxx' \
				or suffix == '.cpp' or suffix == '.CPP' \
				or suffix == '.c++' or suffix == '.C' \
				or suffix == '.s':
				inputs.append(name + '.o')
				uncompiled.append(arg)
			else:
				inputs.append(arg)

	if len(uncompiled):
		print 'callcatcher - linkline contains source files, forcing',\
			'compile of:'
		print '\t', uncompiled
		fakeargs.append('-c')
		for uncompile in uncompiled:
			compileline = fakeargs
			compileline.append(uncompile)
			compile(compileline)

	if not len(inputs):
		return

	makecachedir(output)
	print "callcatcher - detecting link:"
	print "\tautojoining", \
		realoutput, "from\n\t", inputs
	combine.combine(output, inputs)
	print "callcatcher - dump currently unused:"
	print "\tUse \"callanalyse\" to manually analyse a set of compiler output files"
	print "\tautoanalysing", realoutput
	print "\tCurrently unused functions are..."
	analyse.analyse(output, "\t\t")
Exemple #13
0
def do_the_business():
    # open logfile
    with open(logfile_name,'w') as logfile:

        # some details
        broadcast(logfile,"File list contains %d files"%len(file_list))

        # delete the database
        if do_delete_db:
            os.remove(dbfile_name)

        # analysis stage
        if do_analyse:
            start = datetime.now()
            analyse.analyse(file_list=file_list,dbfile_name=dbfile_name,logfile=logfile,use_multiprocessing=use_multiprocessing,rel_judgment_dir=rel_judgment_dir)
            elapsed = datetime.now() - start
            broadcast(logfile,"Analyse phase took %s"%elapsed)

        # crossreference stage
        if do_crossreference:
            start = datetime.now()
            crossreference.crossreference(file_list=file_list,dbfile_name=dbfile_name,logfile=logfile,use_multiprocessing=use_multiprocessing)
            elapsed = datetime.now() - start
            broadcast(logfile,"Crossreference phase took %s"%elapsed)

        # convert stage
        if do_convert:
            conversion_start = time.time()
            start = datetime.now()
            convert.convert(file_list=file_list,dbfile_name=dbfile_name,logfile=logfile,public_html_dir=public_html_dir,use_multiprocessing=use_multiprocessing,do_legislation=do_legislation)
            elapsed = datetime.now() - start
            broadcast(logfile,"Convert phase took %s"%elapsed)
            if do_delete_html:
                delete_html.delete_html(conversion_start,output_dir)

        # disambiguation stage
        if do_disambiguation:
            disambiguation_start = time.time()
            start = datetime.now()
            disambiguation.disambiguation(file_list=file_list,dbfile_name=dbfile_name,logfile=logfile,output_dir=output_dir,use_multiprocessing=use_multiprocessing)
            elapsed = datetime.now() - start
            broadcast(logfile,"Disambiguation phase took %s"%elapsed)

        # index stage
        if do_index:
            start = datetime.now()
            indexes.make_indexes(dbfile_name=dbfile_name,logfile=logfile,output_dir=output_dir,use_multiprocessing=use_multiprocessing)
            elapsed = datetime.now() - start
            broadcast(logfile,"Index phase took %s"%elapsed)
Exemple #14
0
def run(dataset_path, results_path, parser_names, limit):
    parsers = [PARSERS[name] for name in parser_names]

    logger.info("Starting experiment run - reading dataset")
    with open(dataset_path) as dataset_file:
        dataset = [json.loads(row) for row in dataset_file if len(row.strip()) > 0]

    experiment = Experiment(limit)
    
    logger.info("Running experiment")
    results = experiment.run(dataset, parsers)
    output_results(results, results_path)

    logger.info("Experiment complete")
    print analyse(results)
Exemple #15
0
 def formatLineBreaks(self,page_info):
     """ Consolidate code for creating line breaks here """
     # determine the 'normal' line break
     gaps = page_info['gaps_analysis']
     smallest_average_gap = (0,0)
     largest_average_gap = (0,0)
     most_frequent_gap = (0,0)
     for i in gaps: # loop through the dictionaries
         if i['n'] > most_frequent_gap:
             most_frequent_gap = (i,i['n']) # taking this to be 'normal'
     lower = gaps[most_frequent_gap[0]]['min'] # lower boundary
     upper = gaps[most_frequent_gap[0]]['max'] # upper boundary
     previous_y = 0
     string = ""
     for line in page_info['stripped_text']:
         coords = analyse().getCoordinates(line)
         if not  coords:
             string = string+line
         else:
             top_y = int(coords[1])
             if previous_y != 0: # skip the first iteration
                 whitespace = int(top_y-previous_y)
                 if whitespace >= lower and whitespace <= upper:
                     string = string+"<br/>%s"%line
                 elif whitespace > upper:
                     string = string+"<br/><br/>%s"%line
                 else:
                     string = string+line # not quite sure what this does
             previous_y = int(coords[3]) # our bottom y is now 'previous'
     return string
Exemple #16
0
def hello_world():
    name = request.args.get('cd-name')
    text = request.args.get('cd-textarea')
    if(name==None and text==None):
        return render_template('index.html')
    else:
        kind,tags=analyse(name,text)
        return render_template('index.html',kind="类别:"+kind,tags="标签:"+tags)
Exemple #17
0
def main():
    
    if len(sys.argv)<3:
        print 'no data!. Usage:'
        print 'python run.py <.xsls document path> <sheet name> <no. of bins>'
        return
    
    fname = sys.argv[1]
    sname = sys.argv[2]
    
    try:
        nbins = int(sys.argv[3])
    except:
        print('\nno binning specified, using Freedman-Diaconis automatic bin optimisation\n') 
        nbins = None

    ana.analyse(fname, sname, nbins)
Exemple #18
0
 def test_all(self):
     variables = ['asdf', 'hijkl']
     results = analyse(variables)
     self.assertEqual(
         results['highlights']['longest'], {'hijkl': 5})
     self.assertEqual(
         results['stats']['longest_10'],
         [{'hijkl': 5}, {'asdf': 4}])
Exemple #19
0
def scrape_from_file(filename):
    """ The main method which will ask for a file to read from, read it, analyse it and store it.
    (Using other methods)"""

    print "Filename: " + filename

    # Global variables
    global SOLR_SERVER
    global CONFIG

    # What file do you want to read from?
    # file_path = getFile()
    file = open(filename, "r")

    # Set the read variables:
    current_user = ""
    tweet_content = []
    in_tweet = False

    # Setup the Solr server variable
    solr_server = CONFIG.get_solr_server
    sh = solr_server if isinstance(solr_server, StorageHandler) else StorageHandler(solr_server)

    # Start reading the file
    for text in file.readline():

        if not "TwitterHelp.get_all_statuses():" in text:

            # Look for a user and store the username in a variable:
            if text == "#####_NEW_USER_#####":
                current_user = file.readline()

                # Look for a new Tweet and read till new_user or new_tweet.
            elif text == "#####_NEW_TWEET_#####":
                in_tweet = True

            # Look for the end of a Tweet
            elif text == "#####_END_OF_TWEET_#####":
                in_tweet = False
                if tweet_content != []:
                    # Analyse if there's any content
                    (lovekeywords, hatekeywords) = addalyse(filter_analysis(analyse(tweet_content)))

                    # Store into Solr
                    # parameter 4 = 1, update everything on the next update
                    # parameter 5 = 0, full update on next update
                    sh.add_profile(current_user, lovekeywords, hatekeywords, 1, 0)

                    # Debug print
                    print "Username: "******" has the following content:\n" + tweet_content
                    print "\n\n The following lovekeywords were found: \n" + lovekeywords
                    print "\n\n The following hatekeywords were found: \n" + hatekeywords

            # Store the content of a Tweet.
            elif in_tweet:
                if text != "":
                    tweet_content.append(text)
Exemple #20
0
	def doAnalyse(self, imgName):
		imgFullPath = str(self._ui.txtPath.text()) + "/" + imgName
		an = analyse.analyse()
		an.analyseImage(imgFullPath, self.coords[0], self.coords[1], self.coords[2], self.coords[3])

		tmpPath = os.path.dirname(os.path.abspath(__file__)) + "/tmp/"

		self.setImageForControl(self._ui.lblSelect, tmpPath + "line.png")
		self.setImageForControl(self._ui.lblGraph, tmpPath  + "graph.png")
def compareQuery():
    term1 = request.form.get('twitter_query1', None)
    term2 = request.form.get('twitter_query2', None)
    
    country = request.form.get('countryDataset', 'global')
        
    if len(term1) == 0: 
        if len(term2) == 0:
            compare = None
            return compare_err("You must add a search query in at least one of the input fields")
    
    if len(term1) != 0:
        result1, err = analyse(term1, country)    
        if err != None:
            flash("Analysing fewer than 20 tweets will lead to less accurate results. Only "+str(result1.tweetsetInfo.tweet_count)+" tweets analysed for "+str(result1.tweetsetInfo.term))
        
        if result1 == "invalidSearchQuery":
            return compare_err(term1+" is not a valid Twitter hashtag or user handle, please try again")
        elif result1 == "noHashorAt":
            return compare_err("You must enter a #tag or @user  in the first input field, please try again")                        
        elif result1 == "noTweetsFound":
            return compare_err("No tweets found for the query "+term1+", please try again")
        resultlist[0] = result1
       
    if len(term2) != 0: 
        result2, err = analyse(term2, country)
        if err != None:
            flash("Analysing fewer than 20 tweets will lead to less accurate results. Only "+str(result2.tweetsetInfo.tweet_count)+" tweets analysed for "+str(result2.tweetsetInfo.term))
        
        if result2 == "invalidSearchQuery":
            return compare_err(term2+" is not a valid Twitter hashtag or user handle, please try again")
        elif result2 == "noHashorAt":
            return compare_err("You must enter a #tag or @user in input  second input field, please try again")
        elif result2 == "noTweetsFound":
            return compare_err("No tweets found for this query "+term2+", please try again")
        resultlist[1] = result2
    
    compare = compare_results(resultlist[0], resultlist[1], country)

        
    return render_template('tabs/compare_tweets.html', resultlist=resultlist, compare=compare)   
Exemple #22
0
def snippet_to_results(snippet):
    fd, path = tempfile.mkstemp('.js')
    results = None

    # need to do this to flush the file
    with open(path, 'w') as f:
        f.write(snippet)

    with open(path, 'r') as f:
        variables = get_var_names(fullpath=path)
        # finally get interest statistics
        results = analyse(variables)
    return results
Exemple #23
0
def super_analyze(strings):
    custom_gap = '-'
    analyzed = []
    categories = []
    for string in strings:
        a = analyse.analyse(string)
        analyzed.append(a)
        categories.append(a.get_categories())

    uber_analyzed = multi_nw_algorithm(map(translate_categories, categories), 
        gap=custom_gap, null=[], concatenate=prepend)

    generated = factorize(analyzed, uber_analyzed, categories, custom_gap)
    return generated
def main():
    id_ = args.jobid
    n = args.nEvents if args.nEvents else 100000
    # TODO read total number from muon file directly OR
    # TODO always pass from steering process?

    with tempfile.NamedTemporaryFile() as t:
        outFile = t.name
        generate(args.input, args.geofile, n, outFile, args.lofi)
        chain = r.TChain('cbmsim')
        chain.Add(outFile)
        xs = analyse(chain, args.results)
        res = r.TFile.Open(args.results, 'update')
        res.WriteObject(xs, 'results')
        res.Close()
    print 'Slave: Worker process {} done.'.format(id_)
Exemple #25
0
def netclamp_analyse(channel, GIDs, multiplier, make_plots=False):
    threshold = -30  #mV

    directory = "C:/Users/spand/OneDrive - University of Toronto/Year 4/Skinner Lab/Spiking Data/network_clamp_results/"
    directory += (channel + '/' + multiplier)

    file_list = []

    for gid in GIDs:
        file_list.append(directory + '/' + multiplier + '_mytrace_' + \
                         gid + '_soma.dat')

    freq_data = np.zeros((len(file_list), 4))
    isi_data = np.zeros((len(file_list), 4))

    if make_plots == True:
        fig, ax = plt.subplots(len(file_list), 1, sharex=True, sharey=True)
        fig.suptitle(channel + ': ' + multiplier)
        plt.ylabel('Voltage (mV)')
        plt.xlabel('Time (ms)')

    for i in range(len(file_list)):
        x, y = np.loadtxt(file_list[i], unpack=True, skiprows=1)
        if make_plots == True:
            ax[i].plot(x, y)
            ax[i].plot(x, x * 0 + threshold, 'r')
        # print (x,y)
        F, T = analyse(x, y)
        freq_data[i] = F
        isi_data[i] = T

    avg_freq_data = np.zeros(4)
    avg_isi_data = np.zeros(4)

    for i in range(4):
        avg_freq_data[i] = np.mean(freq_data[:, i])
        avg_isi_data[i] = np.mean(isi_data[:, i])

    print('f1 \t f2 \t f_avg \t f_sd')
    print(freq_data)
    print(avg_freq_data)
    print('t1 \t t2 \t t_avg \t t_sd')
    print(isi_data)
    print(avg_isi_data)

    if make_plots == True:
        plt.show()
Exemple #26
0
def parameters_grid_search(train_s, valid_s, method_name, kwargs):
    """
    methode name = string, name of the method (eg :"naiveBayes")
    kwargs = dictionnary of the parameters of the method: range to be tested
    """
    exp = 0
    kwargs_test = {}
    dTuning = {}
    for items in product(*kwargs.values()):
        for i, key in enumerate(kwargs.keys()):
            kwargs_test[key] = items[i]

        d = analyse.analyse(train_s, valid_s, method_name, kwargs_test)

        dTuning[exp]= d
        exp += 1

    return dTuning
def analyseQuery():
    term = request.form.get('twitter_query', '')
    if len(term) == 0:
        return analyse_err("You must add a search query")
    
    country = request.form.get('countryDataset', 'global')
    resultitem, err = analyse(term, country)
    
    if resultitem == "noHashorAt":
        return analyse_err("You must enter a #tag or @user, please try again")
    
    elif resultitem == "noTweetsFound":
        return analyse_err("No tweets found for this query, please try again")
    
    if err != None:
        flash("Analysing fewer than 20 tweets will lead to less accurate results. Only "+str(resultitem.tweetsetInfo.tweet_count)+" tweets analysed for "+str(resultitem.tweetsetInfo.term),'info')
        
    return render_template('tabs/analyse_tweets.html', result=resultitem)   
Exemple #28
0
def reflash_today():
    global today, today_danmu, status, lasttime, user_ip, lastsize
    while True:
        today = time.strftime("%Y-%m-%d", time.localtime())
        status = requests.get(
            'https://api.live.bilibili.com/room/v1/Room/room_init?id=801580'
        ).json()
        flash_lock.acquire()
        now_time = time.time()
        if (now_time - lasttime > 300):
            user_ip = {}
            lasttime = now_time
        reflashflag = False
        nowsize = rd.dbsize()
        if len(user_ip) > 0 and (nowsize != lastsize
                                 or status['data']['live_status'] == 1):
            today_danmu = search_danmu(today, True)
            reflashflag = True
            lastsize = nowsize

        flash_lock.release()

        #print(status['data'])
        top, hantalk = analyse()
        os.system("clear")
        board = "当前在线dd:\n"
        for iip in user_ip:
            board = board + ' ' + str(iip) + ' ' + str(
                user_ip[iip]['cname']) + ' ' + str(
                    user_ip[iip]['date']) + user_ip[iip]['mode'] + '\n'
        print(board)
        print('\n常规更新 数据库变动:', reflashflag, '开播模式:',
              status['data']['live_status'], '更新时间',
              time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
        if status['data']['live_status'] == 1:
            time.sleep(6)
        elif reflashflag == True:
            time.sleep(9)
        else:
            time.sleep(19)
Exemple #29
0
    def analyse(self):
        
        #exception handling
        try:
            fname = self.fname
            if not fname:
                raise IOError('File name is empty')
            
        except Exception as e:
            print e
            tkMessageBox.showwarning('File error', 'No file imported!')
            return

        try:
            sname = str(self.sheetbox.get(self.sheetbox.curselection()))
        except Exception as e:
            print e
            tkMessageBox.showwarning('Sheet selection', 'Please select a sheet')
            return
        
        #check binning options
        if self.bvar.get() == 1:
            nbins = None
        else:
            bin_entry = self.bentry.get()
            try:
                nbins = int(bin_entry)
            except ValueError as e:
                tkMessageBox.showwarning('Binning error', 'Please enter a valid bin number')
                return 
            except Exception as e:
                print e
                tkMessageBox.showwarning('Binning error', 'Please check your binning options')

        #analyse data and display results
        results = ana.analyse(fname, sname, nbins)
        out_text = '\n'.join('{} {}'.format(*x) for x in zip(self.outstrs, results.values()))
        self.output_text.set(out_text)
Exemple #30
0
def predesigned_network(network_type):
    print "running mode:" + network_type
    prompt = ">"
    print "What kind of operation you want to run?"
    print "#1 Run a single experiment;"
    print "#2 Run a batched experiment;"
    print "#3 analyse existing experimental results or doing further experiments on existing data"
    if_batch = int(raw_input(prompt))
    if if_batch == 1:
        run_single(0, network_type)
    elif if_batch == 2:
        print "#1: fixed CNN, different ratio; #2:..."
        run_type = int(raw_input(prompt))
        if run_type == 1:
            os.system('clear')
            print "============================================================================================"
            print "Enter a sery of numbers of the ratio of training samples, end with an 'e' or 'end',"
            print "if you want to use the default sequence 1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90, enter an 'a' or 'all':"
            ratios = []
            temp_ratio = raw_input(prompt)
            if temp_ratio == 'a' or temp_ratio == 'all':
                temp_ratio = [
                    1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70,
                    75, 80, 85, 90
                ]
            else:
                while temp_ratio != 'e' and temp_ratio != 'end':
                    ratios.append(int(temp_ratio))
                    temp_ratio = raw_input(prompt)
            #ratios = temp_ratio
            print ratios
            #def run_batch(learning_ratio):
            #            mix_model_svm_ratio = 0
            #            file_name, neighbors = data_util.prepare(learning_ratio)
            #            print "now gathering the parameters of the network..."
            #            neighbors = neighbors + 1
            #            print "the neighbors strategy is: " + str(neighbors)
            print "enter the dataset name:"
            dataset_fixed = raw_input(prompt)
            print "enter the neighbor strategy, choose from 1, 4, or 8, end with an 'e' or 'end'. if you want to run on all the strategies, enter an 'a' or 'all' for all 1,4,8 strategies."
            temp_strategies_list = []
            temp_strategy_input = raw_input(prompt)
            if temp_strategy_input == 'a' or temp_strategy_input == 'all':
                temp_strategies_list = [1, 4, 8]
            else:
                while temp_strategy_input != 'e' and temp_strategy_input != 'end':
                    temp_strategies_list.append(int(temp_strategy_input))
                    temp_strategy_input = raw_input(prompt)
            #strategy_fixed = raw_input(prompt)

            os.system('clear')
            print "Now gathering network configuration parameters for prior proposed Cube CNN...."
            print "--------------------------------------------------------------------------------------------"
            print "enter the number of convolutional neurons:"
            neurons = int(raw_input(prompt))
            print "enter the number of layers you want the CNN to operate convolutional operation:"
            neuronLayersCount = int(raw_input(prompt))
            print "enter the kernel size of the maxpooling layer:"
            maxpoolings = int(raw_input(prompt))
            print "enter the number of full layers\' neurons, default is 100:"
            fullLayers = int(raw_input(prompt))
            print "enter the batch size for bsgd:"
            batch_size = int(raw_input(prompt))
            print "enter the learning ratio:"
            learning = float(raw_input(prompt))
            print "enter the train decay:"
            train_decay = float(raw_input(prompt))
            print "enter the epoches you want the network to be trained:"
            epoches = int(raw_input(prompt))
            print "now choose the following strategy after the cnn network been trained:"
            print "#1:train a cnn-svm joint framework;"
            print "#2:train a cnn-rf joint framework;"
            print "#3:train both cnn-svm and cnn-rf joint frameworks;"
            #if network_type == '3':
            #    print "#4:compare the cube cnn with the new hic framework;"
            #    print "#5:TODO: train a mix assemble cnn-classifier model."
            #elif network_type == '1'
            print "#4:TODO: train a mix assemble cnn-classifier model."
            if network_type == '3':
                print "#5: run and compare the cube cnn with the new hic framework"
            following_strategy = int(raw_input(prompt))
            if network_type == '1' and following_strategy == 4:
                print "enter the ratio of svm classifier:"
                mix_model_svm_ratio = int(row_input(prompt))
            tress = 0
            if following_strategy == 2 or following_strategy == 3:
                print "enter the count of trees you want to set in Random Forest:"
                trees = int(raw_input(prompt))

            #if network_type == '3' and following_strategy == 4:
            #    print "Now gathering parameter for hic network:"

            print "How many individual experiments want to take?"
            experiment_times = raw_input(prompt)

            for time_counts in range(int(experiment_times)):

                ltime = time.localtime()
                time_stamp = str(ltime[0]) + "#" + str(ltime[1]) + "#" + str(
                    ltime[2]) + "#" + str(ltime[3]) + "#" + str(ltime[4])

                file = open(
                    "../experiments/BatchExpsFixedCNN_" + time_stamp + ".txt",
                    'w')
                resultFile = open(
                    "../experiments/BatchResults_" + time_stamp + ".txt", 'w')
                file.write("======== Experimental Folders ==========\n")
                resultFile.write(
                    "=============== Batch Exprimental Results ===============\n"
                )
                resultFile.write(
                    "=========================================================\n"
                )

                #strategiesList = []
                #if str(strategy_fixed) == 'a' or strategy_fixed == 'all':
                #    strategiesList = [1,4,8]
                #else:
                #    strategiesList = [int(strategy_fixed)]
                #
                strategiesList = temp_strategies_list
                for neighbor_strategy_mark in range(len(strategiesList)):
                    neighbor_strategy = strategiesList[neighbor_strategy_mark]
                    print "now is running on strategy " + str(
                        neighbor_strategy)
                    file.write("~~~~~~~~~~~~~~~ Neighbors Strategies:" +
                               str(neighbor_strategy) + " ~~~~~~~~~~~~~~~\n")
                    for temp_mark in range(len(ratios)):
                        learning_ratio = 0
                        train_decay_inner = 0
                        batch_size_inner = 0
                        if ratios[temp_mark] < 10:
                            learning_ratio = learning / 10
                            train_decay_inner = train_decay / 10
                            batch_size_inner = batch_size / 10
                        #elif ratios[temp_mark] < 5:
                        #    learning_ratio = learning / 100
                        #    train_decay_inner = train_decay / 100
                        #    batch_size_inner = batch_size / 100
                        else:
                            learning_ratio = learning
                            train_decay_inner = train_decay
                            batch_size_inner = batch_size

                        #set the full layers nodes to satisfy the change of neighbors strategies.
                        #TODO: need to check if this makes sense
                        #actual_full_layers = 0
                        #if neighbor_strategy == 4:
                        #    actual_full_layers = fullLayers / 2
                        #elif neighbor_strategy == 1:
                        #    actual_full_layers = fullLayers / 4
    #                    for time_counts in range(int(experiment_times)):
                        file_name = run_batch(dataset_fixed, neighbor_strategy,
                                              neurons, neuronLayersCount,
                                              maxpoolings, fullLayers,
                                              batch_size_inner, learning_ratio,
                                              train_decay_inner, epoches,
                                              following_strategy, trees,
                                              ratios[temp_mark], 2)
                        #file_name = run_single(ratitemp_mark])
                        file.write(file_name + "\n")
                        fileCNNRFResultsPath = file_name + "_CNNRFdescription.txt"
                        if following_strategy == 3:
                            fileCNNSVMResultsPath = file_name + "CNNSVMdescription.txt"
                        resultFile.write(
                            "=========================================================\n"
                        )
                        resultFile.write(file_name + "\n")
                        inputFileRF = open(fileCNNRFResultsPath, "r")
                        if following_strategy == 3:
                            inputFileSVM = open(fileCNNSVMResultsPath, "r")
                        allLinesRF = inputFileRF.readlines()
                        if following_strategy == 3:
                            allLinesSVM = inputFileSVM.readlines()
                        resultFile.write("CNN-RF Results:\n")
                        for eachLine in allLinesRF:
                            resultFile.write(eachLine)
                        resultFile.write(
                            "-----------------------------------------\n")
                        if following_strategy == 3:
                            resultFile.write("CNN-SVM Results:\n")
                            for eachLine in allLinesSVM:
                                resultFile.write(eachLine)
                            inputFileRF.close()
                            inputFileSVM.close()
                        resultFile.write(
                            "##################################################\n"
                        )
                    #file.close()
                resultFile.close()
                print "The results are stored in the file " + "BatchResults_" + time_stamp + ".txt"
                print "All folders contains the experiments are stored in the file " + "BatchExpsFixedCNN_" + time_stamp + ".txt"
    elif if_batch == 3:
        os.system('clear')
        analyse.analyse()
Exemple #31
0
async_mode = None
app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'

#socketio = SocketIO(app)

user_ip = {}
lasttime = time.time()
rd = redis.StrictRedis(host='localhost',
                       port=4514,
                       db=0,
                       decode_responses=True)
lastk = []
status = requests.get(
    'https://api.live.bilibili.com/room/v1/Room/room_init?id=801580').json()
top, hantalk = analyse()
lastsize = 0
# limiter.init_app(app)
today_danmu = ""


def main_server():
    #socketio.run(app, host='0.0.0.0', port=14514,debug=False)
    app.run(host='0.0.0.0', port=14514, debug=False, threaded=True)


def reflash_today():
    global today, today_danmu, status, lasttime, user_ip, lastsize
    while True:
        today = time.strftime("%Y-%m-%d", time.localtime())
        status = requests.get(
Exemple #32
0
import sys
from analyse import analyse
from display import print_word_info, print_error_message

if len(sys.argv) < 2:
    print_error_message(
        'Ooops, something went wrong. Have you supplied a word to be approximated?'
    )
    exit(1)

actual_arguments = sys.argv[1:]
word = ' '.join(actual_arguments)

try:
    info = analyse(word)
    print_word_info(info)
except:
    print_error_message('Ooops, something went wrong internally!')
Exemple #33
0
def main():
    ###############
    ### IMPORT ####
    ###############
    # Importation parameters:
    split= True
    normalize = True
    noise_var = 0.
    ratio_train = 0.9

    # Import the training data:
    print("Extracting the data sets...")
    start = time.clock()
    train_s, valid_s, test_s = tokenizer.extract_data(split= split,
                                                      normalize= normalize,
                                                      noise_variance= noise_var,
                                                      ratio_train= ratio_train)
    stop = time.clock()
    print ("Extraction time: %i s") %(stop-start)

    print(" ")
    print(" ")

    ######################
    ### PRE-TREATMENT ####
    ######################
    print("------------------------- Pre-treatment --------------------------")
    ### Average number of signal per subset:
    print("Train subsets signal average:")
    train_s_average = preTreatment.ratio_sig_per_dataset(train_s[2])
    print(" ")
    print("Valid subsets signal average:")
    valid_s_average = preTreatment.ratio_sig_per_dataset(valid_s[2])

    print(" ")
    print(" ")

    ############
    # ANALYSES #
    ############

    # Dictionnary that will contain all the data for each methods. In the end
    # we'll have a dict of dict
    # Keys of the methods : {naiveBayes, svm, kNeighbors, lda, qda, adaBoost,
    #                       randomForest}
    dMethods ={}
    # RANDOM FOREST:
    kwargs_rdf= {'n_trees': 50}
    dMethods['randomForest'] = analyse.analyse(train_s, valid_s, 'randomForest',
                                               kwargs_rdf)

    print(" ")

    ##################
    # POST-TREATMENT #
    ##################
    print("post treatment")
    yProba_s = dMethods['randomForest']['yProba_s']
    yPredicted_s = dMethods['randomForest']['yPredicted_s']

    for n in range(8):
        L = []
        for i in range(yPredicted_s[n].shape[0]):
            if yPredicted_s[n][i] == 1:
                L.append(yProba_s[n][i][1])

        L.sort(reverse = True)
        prob_limit = L[int(len(L)*0.45)]


        for i in range(yPredicted_s[n].shape[0]):
            if yProba_s[n][i][1] < prob_limit:
                yPredicted_s[n][i] = 0
            else:
                yPredicted_s[n][i] = 1

    # Numerical score:
    if type(yPredicted_s) == list:
        for i in range(len(yPredicted_s)):
            sum_s, sum_b = submission.get_numerical_score(yPredicted_s[i],
                                                          valid_s[2][i])
            print "Subset %i: %i elements - sum_s[%i] = %i - sum_b[%i] = %i" \
                    %(i, yPredicted_s[i].shape[0], i, sum_s, i, sum_b)
    
    # Get s and b for each group (s_s, b_s) and the final final_s and
    # final_b:
    final_s, final_b, s_s, b_s = submission.get_s_b_8(yPredicted_s, valid_s[2],
                                                  valid_s[3])

    # Balance the s and b
    final_s *= 250000/25000
    final_b *= 250000/25000
    # AMS final:
    AMS = hbc.AMS(final_s , final_b)
    print ("Expected AMS score for randomforest : %f") %AMS
    #AMS by group
    AMS_s = []
    for i, (s,b) in enumerate(zip(s_s, b_s)):
        s *= 250000/yPredicted_s[i].shape[0]
        b *= 250000/yPredicted_s[i].shape[0]
        score = hbc.AMS(s,b)
        AMS_s.append(score)
        print("Expected AMS score for randomforest :  for group %i is : %f" %(i, score))
    print(" ")

    
    ##############
    # SUBMISSION #
    ##############
    print("-------------------------- Submission ---------------------------")

    # Prediction on the test set:
    # method used for the submission
    # TODO : Verifier que le nom de la method a bien la bonne forme(
    # creer une liste de noms de methodes)

    #method = "randomForest"

    #test_prediction_s, test_proba_s = eval(method).get_test_prediction(
    #                                            dMethods[method]['predictor_s'],
    #                                            test_s[1])

    test_prediction_s, test_proba_s = postTreatment.get_SL_test_prediction(
                                                dMethods, dSl, test_s[1])


    print("Test subsets signal average:")
    test_s_average = preTreatment.ratio_sig_per_dataset(test_prediction_s)
    print(" ")

    #RankOrder = np.arange(1,550001)

    if type(test_prediction_s) == list:
        test_prediction_s = np.concatenate(test_prediction_s)
        test_proba_s = np.concatenate(test_proba_s)
        RankOrder = postTreatment.rank_signals(test_proba_s)
        ID = np.concatenate(test_s[0])
    else:
        ID = test_s[0]

    # Create a submission file:
    sub = submission.print_submission(ID, RankOrder , test_prediction_s)

    return sub
def FitnessFunction(point, sample):
    try:
        tmpl = copy.deepcopy(config.RESULTS_TEMPLATE)
        params = point

        paramFile = '/eos/experiment/ship/user/ffedship/EA_V2/Shared/params' + str(
            sample) + '_{}.root'.format(create_id(params))
        geoinfoFile = paramFile.replace('params', 'geoinfo')
        heavy = '/eos/experiment/ship/user/ffedship/EA_V2/Shared/heavy' + str(
            sample) + '_{}'.format(create_id(params))
        lockfile = paramFile + '.lock'
        print heavy, lockfile
        if os.path.exists(geoinfoFile):
            geolockfile = geoinfoFile + '.lock'
            lock = filelock.FileLock(geolockfile)
            if not lock.is_locked:
                with lock:
                    with open(geoinfoFile, 'r') as f:
                        length, weight = map(float,
                                             f.read().strip().split(','))
                    tmpl['weight'] = weight
                    tmpl['length'] = length
        while not os.path.exists(paramFile) and not os.path.exists(heavy):
            lock = filelock.FileLock(lockfile)
            if not lock.is_locked:
                with lock:
                    tmpl['status'] = 'Acquired lock.'
                    tmp_paramFile = generate_geo(
                        paramFile.replace('.r', '.tmp.r'), params)
                    subprocess.call([
                        'python2',
                        '/afs/cern.ch/user/f/ffedship/private/EA_Muon_Shield_V2/get_geo.py',
                        '-g', tmp_paramFile, '-o', geoinfoFile
                    ])

                    shutil.move(
                        '/eos/experiment/ship/user/ffedship/EA_V2/Shared/' +
                        os.path.basename(tmp_paramFile),
                        paramFile.replace('shared',
                                          'output').replace('params', 'geo'))
                    with open(geoinfoFile, 'r') as f:
                        length, weight = map(float,
                                             f.read().strip().split(','))
                    tmpl['weight'] = weight
                    tmpl['length'] = length
                    shutil.move(
                        '/eos/experiment/ship/user/ffedship/EA_V2/Geometry/' +
                        os.path.basename(tmp_paramFile), paramFile)

                    tmpl['status'] = 'Created geometry.'
                    print "Fitness Function Message: Geometry has been generated using config ", point
                    print "Fitness Function Message: Length ", length
                    print "Fitness Function Message: Weight ", weight
            else:
                sleep(60)
        outFile = root_output_name

        tmpl['status'] = 'Simulating...'
        generate(inputFile=root_input_name,
                 paramFile=paramFile,
                 outFile=root_output_name,
                 seed=1,
                 nEvents=10000)

        tmpl['status'] = 'Analysing...'
        chain = r.TChain('cbmsim')
        chain.Add(outFile)
        xs = analyse(chain, 'hists.root')
        tmpl['muons'] = len(xs)
        tmpl['muons_w'] = sum(xs)
        print "muons: ", tmpl['muons']
        print "muons_w: ", tmpl['muons_w']
        print "Fitness", FCN(tmpl['weight'], np.array(xs), tmpl['length'])[0]
        XS_output = open(csv_output_name, "w")
        XS_write = csv.writer(XS_output)
        XS_write.writerow([tmpl['weight'], tmpl['length'], tmpl['muons_w']])
        XS_output.close()
        tmpl['error'] = None
        tmpl['status'] = 'Done.'
        os.remove(root_output_name)
    except:
        print "EA_LL_FCN Message: Wrong geometry, operation rejected, negative values assigned"
        XS_output = open(csv_output_name, "w")
        XS_write = csv.writer(XS_output)
        XS_write.writerow([100000000, 10000000, 100000000])
        XS_output.close()
Exemple #35
0
def predesigned_network(network_type):
    print "running mode:" + network_type
    prompt = ">"
    print "What kind of operation you want to run?"
    print "#1 Run a single experiment;" 
    print "#2 Run a batched experiment;"
    print "#3 analyse existing experimental results or doing further experiments on existing data"
    if_batch = int(raw_input(prompt))
    if if_batch == 1:
        run_single(0, network_type)
    elif if_batch == 2:
        print "#1: fixed CNN, different ratio; #2:..."
        run_type = int(raw_input(prompt))
        if run_type == 1:
            os.system('clear')
            print "============================================================================================"
            print "Enter a sery of numbers of the ratio of training samples, end with an 'e' or 'end',"
            print "if you want to use the default sequence 1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90, enter an 'a' or 'all':"
            ratios = []
            temp_ratio = raw_input(prompt)
            if temp_ratio == 'a' or temp_ratio == 'all':
                temp_ratio = [1,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90]
            else:
                while temp_ratio != 'e' and temp_ratio != 'end':
                    ratios.append(int(temp_ratio))
                    temp_ratio = raw_input(prompt)
            #ratios = temp_ratio
            print ratios
#def run_batch(learning_ratio):
#            mix_model_svm_ratio = 0
#            file_name, neighbors = data_util.prepare(learning_ratio)
#            print "now gathering the parameters of the network..."
#            neighbors = neighbors + 1
#            print "the neighbors strategy is: " + str(neighbors)
            print "enter the dataset name:"
            dataset_fixed = raw_input(prompt)
            print "enter the neighbor strategy, choose from 1, 4, or 8, end with an 'e' or 'end'. if you want to run on all the strategies, enter an 'a' or 'all' for all 1,4,8 strategies."
            temp_strategies_list = []
            temp_strategy_input = raw_input(prompt)
            if temp_strategy_input == 'a' or temp_strategy_input == 'all':
                temp_strategies_list = [1,4,8]
            else:
                while temp_strategy_input != 'e' and temp_strategy_input != 'end':
                    temp_strategies_list.append(int(temp_strategy_input))
                    temp_strategy_input = raw_input(prompt)
            #strategy_fixed = raw_input(prompt)

            os.system('clear')
            print "Now gathering network configuration parameters for prior proposed Cube CNN...."
            print "--------------------------------------------------------------------------------------------"
            print "enter the number of convolutional neurons:"
            neurons = int(raw_input(prompt))
            print "enter the number of layers you want the CNN to operate convolutional operation:"
            neuronLayersCount = int(raw_input(prompt))
            print "enter the kernel size of the maxpooling layer:"
            maxpoolings = int(raw_input(prompt))
            print "enter the number of full layers\' neurons, default is 100:"
            fullLayers = int(raw_input(prompt))
            print "enter the batch size for bsgd:"
            batch_size = int(raw_input(prompt))
            print "enter the learning ratio:"
            learning = float(raw_input(prompt))
            print "enter the train decay:"
            train_decay = float(raw_input(prompt))
            print "enter the epoches you want the network to be trained:"
            epoches = int(raw_input(prompt))
            print "now choose the following strategy after the cnn network been trained:"
            print "#1:train a cnn-svm joint framework;"
            print "#2:train a cnn-rf joint framework;"
            print "#3:train both cnn-svm and cnn-rf joint frameworks;"
            #if network_type == '3':
            #    print "#4:compare the cube cnn with the new hic framework;"
            #    print "#5:TODO: train a mix assemble cnn-classifier model."
            #elif network_type == '1'
            print "#4:TODO: train a mix assemble cnn-classifier model."
            if network_type == '3':
                print "#5: run and compare the cube cnn with the new hic framework"
            following_strategy = int(raw_input(prompt))
            if network_type == '1' and following_strategy == 4:
                print "enter the ratio of svm classifier:"
                mix_model_svm_ratio = int(row_input(prompt))
            tress = 0
            if following_strategy == 2 or following_strategy == 3:
                print "enter the count of trees you want to set in Random Forest:"
                trees = int(raw_input(prompt))
            
            #if network_type == '3' and following_strategy == 4:
            #    print "Now gathering parameter for hic network:"

            
            print "How many individual experiments want to take?"
            experiment_times =  raw_input(prompt)
        
            for time_counts in range(int(experiment_times)):

                ltime = time.localtime()
                time_stamp = str(ltime[0]) + "#" + str(ltime[1]) + "#" + str(ltime[2]) + "#" + str(ltime[3]) + "#" + str(ltime[4])

                file = open("../experiments/BatchExpsFixedCNN_" + time_stamp + ".txt", 'w')
                resultFile = open("../experiments/BatchResults_" + time_stamp + ".txt", 'w')
                file.write("======== Experimental Folders ==========\n")
                resultFile.write("=============== Batch Exprimental Results ===============\n")
                resultFile.write("=========================================================\n")
                
                #strategiesList = []
                #if str(strategy_fixed) == 'a' or strategy_fixed == 'all':
                #    strategiesList = [1,4,8]
                #else:
                #    strategiesList = [int(strategy_fixed)]
                # 
                strategiesList = temp_strategies_list
                for neighbor_strategy_mark in range(len(strategiesList)):
                    neighbor_strategy = strategiesList[neighbor_strategy_mark]
                    print "now is running on strategy " + str(neighbor_strategy)
                    file.write("~~~~~~~~~~~~~~~ Neighbors Strategies:" + str(neighbor_strategy) +" ~~~~~~~~~~~~~~~\n")
                    for temp_mark in range(len(ratios)):
                        learning_ratio = 0
                        train_decay_inner = 0
                        batch_size_inner = 0
                        if ratios[temp_mark] < 10:
                            learning_ratio = learning / 10
                            train_decay_inner = train_decay / 10
                            batch_size_inner = batch_size / 10
                        #elif ratios[temp_mark] < 5:
                        #    learning_ratio = learning / 100
                        #    train_decay_inner = train_decay / 100
                        #    batch_size_inner = batch_size / 100
                        else:
                            learning_ratio = learning
                            train_decay_inner = train_decay
                            batch_size_inner = batch_size
        
                        #set the full layers nodes to satisfy the change of neighbors strategies.
                        #TODO: need to check if this makes sense
                        #actual_full_layers = 0
                        #if neighbor_strategy == 4:
                        #    actual_full_layers = fullLayers / 2
                        #elif neighbor_strategy == 1:
                        #    actual_full_layers = fullLayers / 4
    #                    for time_counts in range(int(experiment_times)):
                        file_name = run_batch(dataset_fixed,neighbor_strategy, neurons, neuronLayersCount, maxpoolings,fullLayers, batch_size_inner, learning_ratio, train_decay_inner, epoches, following_strategy, trees, ratios[temp_mark], 2)
                        #file_name = run_single(ratitemp_mark])
                        file.write(file_name + "\n")
                        fileCNNRFResultsPath = file_name + "_CNNRFdescription.txt"
                        if following_strategy == 3:
                            fileCNNSVMResultsPath = file_name + "CNNSVMdescription.txt"
                        resultFile.write("=========================================================\n")
                        resultFile.write(file_name + "\n")
                        inputFileRF = open(fileCNNRFResultsPath, "r")
                        if following_strategy == 3:
                            inputFileSVM = open(fileCNNSVMResultsPath, "r")
                        allLinesRF = inputFileRF.readlines()
                        if following_strategy == 3:
                            allLinesSVM = inputFileSVM.readlines()
                        resultFile.write("CNN-RF Results:\n")
                        for eachLine in allLinesRF:
                            resultFile.write(eachLine)
                        resultFile.write("-----------------------------------------\n")
                        if following_strategy == 3:
                            resultFile.write("CNN-SVM Results:\n")
                            for eachLine in allLinesSVM:
                                resultFile.write(eachLine)
                            inputFileRF.close()
                            inputFileSVM.close()
                        resultFile.write("##################################################\n")
                    #file.close()
                resultFile.close()
                print "The results are stored in the file " + "BatchResults_" + time_stamp + ".txt"
                print "All folders contains the experiments are stored in the file " + "BatchExpsFixedCNN_" + time_stamp + ".txt"
    elif if_batch == 3:
        os.system('clear')
        analyse.analyse()
Exemple #36
0
    anadbprocess.start()
    print('Started analyse database process')

    timestamp = int(dt.datetime.now().timestamp())

    while control_flag.value == 0:
        if mbqueue.qsize() == 0:
            time.sleep(0.2)
        else:
            time1 = time.time()

            # get data from mbqueue
            pq_data, timestamp = mbqueue.get()

            # send data to analysis func
            frequency_10s, status_dict = ana.analyse(pq_data)

            # create dict for database insert and showing on website
            for addr in pq_data.index:
                if addr in live_ports:
                    livedatadict['port_' + str(addr)] = pq_data[addr]
                datadict['port_' + str(addr)] = pq_data[addr]

            # add primary key to every dict and frequency 10s
            datadict['timestamp'] = timestamp
            datadict['frequency_10s'] = frequency_10s

            # insert data in dbqueue
            dbqueue.put(datadict)

            # create data json
Exemple #37
0
        writer.writerow(src[neuron_index])
    csvFile.close()


def mapping_process():

    assert SIM_TIME > 0
    spynnaker.setup(timestep=1)
    spynnaker.set_number_of_neurons_per_core(spynnaker.IF_curr_exp, 50)
    time_space = readData()
    pn_population = setupLayer_PN(time_space)
    kc_population = setupLayer_KC()
    kc_population.record(["spikes"])
    pn_kc_projection = setupProjection_PN_KC(pn_population, kc_population)
    spynnaker.run(SIM_TIME)
    neo = kc_population.get_data(variables=["spikes"])
    spikeData_original = neo.segments[0].spiketrains
    spynnaker.end()
    return spikeData_original


if __name__ == '__main__':

    NUMBER_OF_DATA = int(sys.argv[1])
    SIM_TIME = NUMBER_OF_DATA * 50  # 暂时默认expose_time=50
    begin = time.time()
    spikeData_original = mapping_process()
    end = time.time()
    operation_time("***Whole process of Map.py", begin=begin, end=end)
    ana.analyse(spikeData_original)
Exemple #38
0
def search():
    passwd=request.args.get('passwd')
    return analyse(passwd)
valid_s = tuple(valid_s)
test_s  = tuple(test_s)


print(" ")
### Classifier
# Linear SVM:
dMethods = {}
kwargs_linearSVM= {'penalty': 'l2', 'loss': 'l2', 'dual': True, 'tol': 0.0001,
                   'C': 1.0, 'multi_class': 'ovr', 'fit_intercept': True,
                   'intercept_scaling': 1, 'class_weight': None, 'verbose': 0,
                   'random_state': None}

dMethods['linearSVM'] = analyse.analyse(train_s= train_s,
                                           train2_s= train_s_2,
                                           valid_s= valid_s,
                                           method_name = 'linearSVM',
                                           kwargs = kwargs_linearSVM)

print dMethods['linearSVM']['AMS_treshold_valid']

"""
if load_only == True:
    # Load learning:
    print(" ")
    sub_folder = 'unsupervised'
    load_dir = os.path.join(load_path,load_path,sub_folder)
    stack_AE = SAE.load(load_dir)

    reconstructed_layer_value, error = stack_AE.reconstruct(test_set_x)
    print("The error of the loaded network reconstruction is:  {0}".format(error.eval()), "%")
            for self.i in self.out:
                if (self.i["view"] == "pro"):
                    self.procount = self.procount + 1
                else:
                    self.negcount = self.negcount + 1
            print("Tweets for:", self.procount)
            print("Tweets against:", self.negcount)
            if self.procount > self.negcount:
                print("Twitter user are in favor of:", self.input)
            else:
                print("Twitter user are not in favor of:", self.input)
        elif (self.userAnswer == "4"):
            anas.twitPollCompare()
        elif (self.userAnswer == "5"):
            anas.outOldData()
        else:
            print("Plase enter a valid input (1,2,3,4,5).")
            go.menu()
        return 0


dis = display()
threading.Thread(target=dis.slider, args=("Connecting ", )).start()
twit = twitterAPI()
mong = mongo()
anas = analyse(mong.conn())
coll = collection(twit.authentigate(False), mong.conn())
go = Main(twit.authentigate(False), mong.conn())
dis.stop()
go.menu()  #calls the function that gets tweets and puts them in the DB
def highchart(request):
    analyse()
    template = loader.get_template('scholarship/highchart.html')
    return HttpResponse(template.render(request))
Exemple #42
0
def main():

    ###############
    ### IMPORT ####
    ###############
    # Importation parameters:
    split= True
    normalize = True
    noise_var = 0.
    ratio_train = 0.9

    # Import the training data:
    print("Extracting the data sets...")
    start = time.clock()
    train_s, valid_s, test_s = tokenizer.extract_data(split= split, \
                                                      normalize= normalize, \
                                                      noise_variance= noise_var, \
                                                      ratio_train= ratio_train)

    yValid_conca = preTreatment.concatenate_vectors(valid_s[2])
    weights_conca = preTreatment.concatenate_vectors(valid_s[3])

    stop = time.clock()
    print ("Extraction time: %i s") %(stop-start)

    print(" ")
    print(" ")

    # Create the elected vectors for each group (best AMS score)
    best_yPredicted_s = [np.zeros(valid_s[2][i].shape[0]) for i in range(8)]
    best_yProba_s = [np.zeros(valid_s[2][i].shape[0]) for i in range(8)]
    best_AMS_s = [0. for i in range(8)]
    best_method_s = [0 for i in range(8)]
    best_ratio_s = [0 for i in range(8)]
    best_AMS_1_method = 0.
    best_method = "methode"
    best_ratio = "0."

    ######################
    ### PRE-TREATMENT ####
    ######################
    print("------------------------- Pre-treatment --------------------------")
    ### Average number of signal per subset:
    print("Train subsets signal average:")
    train_s_average = preTreatment.ratio_sig_per_dataset(train_s[2])
    print(" ")
    print("Valid subsets signal average:")
    valid_s_average = preTreatment.ratio_sig_per_dataset(valid_s[2])

    print(" ")
    print(" ")

    ############
    # ANALYSES #
    ############

    # Dictionnary that will contain all the data for each methods. In the end
    # we'll have a dict of dict
    # Keys of the methods : {naiveBayes, svm, kNeighbors, lda, qda, adaBoost,
    #                       randomForest, gradientBoosting}
    dMethods ={}

    # NAIVE BAYES:

    kwargs_bayes = {}
    dMethods['naiveBayes'] =  analyse.analyse(train_s, valid_s, 'naiveBayes',
                                              kwargs_bayes)


    kwargs_bayes = {}
    dMethods['naiveBayes'] =  analyse.analyse(train_s, valid_s, 'naiveBayes',
                                              kwargs_bayes)

    # SVM
    
    kwargs_svm ={}
    dMethods['svm'] = analyse.analyse(train_s, valid_s,'svm', kwargs_svm)
    

    # K NEIGHBORS
    kwargs_tuning_kn = {'n_neighbors': [20,50]}
    dTuning = tuningModel.parameters_grid_search(train_s, valid_s, 'kNeighbors',
                                             kwargs_tuning_kn)

    dMethods['kNeighbors'] = combineClassifiers.select_best_classifiers(dTuning, valid_s)
    
    # LDA
    kwargs_lda = {}
    dMethods['lda'] = analyse.analyse(train_s, valid_s, 'lda', kwargs_lda)
    # QDA
    kwargs_qda= {}
    dMethods['qda'] = analyse.analyse(train_s, valid_s, 'qda', kwargs_qda)

    # ADABOOST
    kwargs_ada= {   'n_estimators': 50,
                    'learning_rate': 1.,
                    'algorithm': 'SAMME.R',
                    'random_state':None}
    dMethods['adaBoost'] = analyse.analyse(train_s, valid_s, 'adaBoost',
                                           kwargs_ada)

    # RANDOM FOREST:
    kwargs_tuning_rdf = {'n_estimators': [10,50,100]}

    dTuning = tuningModel.parameters_grid_search(train_s, valid_s, 'randomForest',
                                             kwargs_tuning_rdf)

    dMethods['randomForest'] = combineClassifiers.select_best_classifiers(dTuning,
                                                                valid_s)

    # GRADIENT BOOSTING

    kwargs_gradB = {}

    dMethods['gradientBoosting'] = analyse.analyse(train_s, valid_s, 'gradientBoosting', kwargs_gradB)


    kwargs_tuning_gradB = {'loss': ['deviance'], 'learning_rate': [0.1],
                    'n_estimators': [100], 'subsample': [1.0],
                    'min_samples_split': [2], 'min_samples_leaf': [1],
                    'max_depth': [10], 'init': [None], 'random_state': [None],
                    'max_features': [None], 'verbose': [0]}

    dTuning = tuningModel.parameters_grid_search(train_s, valid_s,
                                                'gradientBoosting',
                                                kwargs_tuning_gradB)

    dMethods['gradientBoosting'] = combineClassifiers.select_best_classifiers(
                                                                dTuning,
                                                         valid_s)
    
    print(" ")

    ##################
    # POST-TREATMENT #
    ##################
    print("-------------------- Best overall combination --------------------")

    dCombine = combineClassifiers.select_best_classifiers(dMethods, valid_s)

    print("-------------------------- Thresholding --------------------------")

     # COMBINED CLASSIFIERS:
    f = open("Tests/test_treshold_combined.txt","w")

    yProba_s = dCombine['yProba_s']
    yPredicted_s = dCombine['yPredicted_s']
    #Let's concatenate the vectors
    yProba_conca = preTreatment.concatenate_vectors(yProba_s)
    yPredicted_conca = preTreatment.concatenate_vectors(yPredicted_s)

    # Best treshold global
    best_treshold = tresholding.best_treshold(yProba_conca, yValid_conca, weights_conca)
    yPredicted_treshold = tresholding.get_yPredicted_treshold(yProba_conca, best_treshold)

    s, b = submission.get_s_b(yPredicted_treshold, yValid_conca, weights_conca)
    s *= 10
    b *= 10
    ams = hbc.AMS(s,b)
    if ams > best_AMS_1_method:
        best_AMS_1_method = ams
        best_method = dCombine['method'][i]
        best_ratio = best_treshold

    # Best treshold group by group
    for i in range(8):
        best_treshold = tresholding.best_treshold(yProba_s[i], valid_s[2][i], valid_s[3][i])
        yPredicted_s[i] = tresholding.get_yPredicted_treshold(yProba_s[i], best_treshold)
        s, b = submission.get_s_b(yPredicted_s[i], valid_s[2][i], valid_s[3][i])
        s *= 250000/yPredicted_s[i].shape[0]
        b *= 250000/yPredicted_s[i].shape[0]
        ams = hbc.AMS(s,b)
        if ams > best_AMS_s[i]:
            best_yPredicted_s[i] = yPredicted_s[i]
            best_yProba_s[i] = yProba_s[i]
            best_AMS_s[i] = ams
            best_method_s[i] = dCombine['method'][i]
            best_ratio_s[i] = best_treshold


    # FOR EACH METHOD:
    for method in dMethods:

        yProba_s = dMethods[method]['yProba_s']
        yPredicted_s = dMethods[method]['yPredicted_s']

        #Let's concatenate the vectors
        yProba_conca = preTreatment.concatenate_vectors(yProba_s)
        yPredicted_conca = preTreatment.concatenate_vectors(yPredicted_s)

        # Best treshold global
        best_treshold = tresholding.best_treshold(yProba_conca, yValid_conca, weights_conca)
        yPredicted_treshold = tresholding.get_yPredicted_treshold(yProba_conca, best_treshold)

        s, b = submission.get_s_b(yPredicted_treshold, yValid_conca, weights_conca)
        s *= 10
        b *= 10
        ams = hbc.AMS(s,b)
        if ams > best_AMS_1_method:
            best_AMS_1_method = ams
            best_method = str(method)
            best_ratio = best_treshold

        # Best treshold group by group
        for i in range(8):
            best_treshold = tresholding.best_treshold(yProba_s[i], valid_s[2][i],
                                                      valid_s[3][i])
            yPredicted_s[i] = tresholding.get_yPredicted_treshold(yProba_s[i],
                                                                  best_treshold)
            s, b = submission.get_s_b(yPredicted_s[i], valid_s[2][i],
                                      valid_s[3][i])
            s *= 250000/yPredicted_s[i].shape[0]
            b *= 250000/yPredicted_s[i].shape[0]
            ams = hbc.AMS(s,b)
            if ams > best_AMS_s[i]:
                best_yPredicted_s[i] = yPredicted_s[i]
                best_yProba_s[i] = yProba_s[i]
                best_AMS_s[i] = ams
                best_method_s[i] = str(method)
                best_ratio_s[i] = best_treshold

    # Let's concatenate the 8 vectors which performs the best on each on
    # each of the sub group and tresholding it 
    best_yPredicted_conca = preTreatment.concatenate_vectors(best_yPredicted_s)
    best_treshold_conca = tresholding.best_treshold(best_yPredicted_conca, yValid_conca, weights_conca)
    best_yPredicted_conca_treshold = tresholding.get_yPredicted_treshold(best_yPredicted_conca, best_treshold_conca)

    best_final_s, best_final_b, best_s_s, best_b_s = submission.get_s_b_8(best_yPredicted_s, valid_s[2], valid_s[3])
    best_s_treshold, best_b_treshold = submission.get_s_b(best_yPredicted_conca_treshold, yValid_conca, weights_conca)

    best_final_s *= 10
    best_final_b *= 10
    best_s_treshold *= 10
    best_b_treshold *= 10
    best_AMS = hbc.AMS(best_final_s, best_final_b)
    best_AMS_treshold = hbc.AMS(best_s_treshold, best_b_treshold)


    print "Best AMS using one of the methods : %f" %best_AMS_1_method
    print "    method : %s" %(str(method))
    print "    ratio : %f" %(best_ratio)
    print " "
    print "Best AMS final : %f" %best_AMS
    print "Best AMS final after final tresholding : %f" %best_AMS_treshold
    print "best ratio on the concatenated vector : %f" %best_treshold_conca
    print " "

    for n in range(8):
        print "Best AMS group %i: %f - method %s - ratio %f" \
                %(n, best_AMS_s[n], best_method_s[n], best_ratio_s[n])

    return best_yPredicted_s, valid_s
Exemple #43
0
def main():
    ###############
    ### IMPORT ####
    ###############
    # Importation parameters:
    split= True
    normalize = True
    noise_var = 0.
    ratio_train = 0.9

    # Import the training data:
    print("Extracting the data sets...")
    start = time.clock()
    train_s, valid_s, test_s = tokenizer.extract_data(split= split,
                                                      normalize= normalize,
                                                      noise_variance= noise_var,
                                                      ratio_train= ratio_train)
    stop = time.clock()
    print ("Extraction time: %i s") %(stop-start)

    print(" ")
    print(" ")

    ######################
    ### PRE-TREATMENT ####
    ######################
    print("------------------------- Pre-treatment --------------------------")
    ### Average number of signal per subset:
    print("Train subsets signal average:")
    train_s_average = preTreatment.ratio_sig_per_dataset(train_s[2])
    print(" ")
    print("Valid subsets signal average:")
    valid_s_average = preTreatment.ratio_sig_per_dataset(valid_s[2])

    print(" ")
    print(" ")

    ############
    # ANALYSES #
    ############

    # Dictionnary that will contain all the data for each methods. In the end
    # we'll have a dict of dict
    # Keys of the methods : {naiveBayes, svm, kNeighbors, lda, qda, adaBoost,
    #                       randomForest}
    dMethods ={}

    # NAIVE BAYES:
    kwargs_bayes = {}
    dMethods['naiveBayes'] =  analyse.analyse(train_s, valid_s, 'naiveBayes',
                                              kwargs_bayes)

    # SVM
    """
    kwargs_tuning_svm ={'kernel': ["rbf", "poly"], 'C' : [0.025],
                        'probability': [True]}

    dTuning = tuningModel.parameters_grid_search(train_s, valid_s, 'svm',
                                             kwargs_tuning_svm)

    dMethods['svm'] = combineClassifiers.select_best_classifiers(dTuning,
                                                                    valid_s)
    """

    # K NEIGHBORS
    kwargs_tuning_kn = {'n_neighbors': [10,20]}
    dTuning = tuningModel.parameters_grid_search(train_s, valid_s, 'kNeighbors',
                                             kwargs_tuning_kn)

    dMethods['kNeighbors'] = combineClassifiers.select_best_classifiers(dTuning,
                                                                        valid_s)

    # LDA
    kwargs_lda = {}
    dMethods['lda'] = analyse.analyse(train_s, valid_s, 'lda', kwargs_lda)
    # QDA
    kwargs_qda= {}
    dMethods['qda'] = analyse.analyse(train_s, valid_s, 'qda', kwargs_qda)


    # ADABOOST
    kwargs_ada= {'n_estimators': 50,
                 'learning_rate': 1.0, 'algorithm': 'SAMME.R',
                 'random_state': None}
    #kwargs_ada = {}

    dMethods['adaBoost'] = analyse.analyse(train_s, valid_s, 'adaBoost',
                                            kwargs_ada)

    # GRADIENT BOOSTING:
    kwargs_tuning_gradB = {'loss': ['deviance'], 'learning_rate': [0.1],
                    'n_estimators': [100,200], 'subsample': [1.0],
                    'min_samples_split': [2], 'min_samples_leaf':  [200],
                    'max_depth': [10], 'init': [None], 'random_state': [None],
                    'max_features': [None], 'verbose': [0]}

    dTuning = tuningModel.parameters_grid_search(train_s, valid_s,
                                                 'gradientBoosting',
                                                 kwargs_tuning_gradB)

    dMethods['gradientBoosting'] = combineClassifiers.select_best_classifiers(
                                                                dTuning,
                                                                valid_s)

    # RANDOM FOREST:
    kwargs_tuning_rdf = {'n_estimators': [10,20,50,100]}

    dTuning = tuningModel.parameters_grid_search(train_s, valid_s, 'randomForest',
                                                    kwargs_tuning_rdf)

    dMethods['randomForest'] = combineClassifiers.select_best_classifiers(dTuning,
                                                                          valid_s)


    print(" ")

    ##################
    # POST-TREATMENT #
    ##################
    print("------------------------ Post Treatment -----------------------")

    d = combineClassifiers.select_best_classifiers(dMethods, valid_s)

    print (" ")
    for i in range(len(d['parameters'])):
        print "Best classifier for subset %i : " %i
        if type(d['method'][i]) == list:
            print d['method'][i][i], ": ", d['parameters'][i]
        else:
            print d['method'][i], ": ", d['parameters'][i]

    """
    ##############
    # SUBMISSION #
    ##############
    print("-------------------------- Submission ---------------------------")

    # Prediction on the test set:
    # method used for the submission
    # TODO : Verifier que le nom de la method a bien la bonne forme(
    # creer une liste de noms de methodes)

    #method = "randomForest"

    #test_prediction_s, test_proba_s = eval(method).get_test_prediction(
    #                                            dMethods[method]['predictor_s'],
    #                                            test_s[1])

    test_prediction_s, test_proba_s = onTopClassifier.get_SL_test_prediction(
                                                dMethods, dSl, test_s[1])


    print("Test subsets signal average:")
    test_s_average = preTreatment.ratio_sig_per_dataset(test_prediction_s)
    print(" ")

    #RankOrder = np.arange(1,550001)

    if type(test_prediction_s) == list:
        test_prediction_s = np.concatenate(test_prediction_s)
        test_proba_s = np.concatenate(test_proba_s)
        RankOrder = onTopClassifier.rank_signals(test_proba_s)
        ID = np.concatenate(test_s[0])
    else:
        ID = test_s[0]

    # Create a submission file:
    sub = submission.print_submission(ID, RankOrder , test_prediction_s)
    """
    return d
Exemple #44
0
    'penalty': 'l2',
    'loss': 'l2',
    'dual': True,
    'tol': 0.0001,
    'C': 1.0,
    'multi_class': 'ovr',
    'fit_intercept': True,
    'intercept_scaling': 1,
    'class_weight': None,
    'verbose': 0,
    'random_state': None
}

dMethods['linearSVM'] = analyse.analyse(train_s=train_s,
                                        train2_s=train_s_2,
                                        valid_s=valid_s,
                                        method_name='linearSVM',
                                        kwargs=kwargs_linearSVM)

print dMethods['linearSVM']['AMS_treshold_valid']
"""
if load_only == True:
    # Load learning:
    print(" ")
    sub_folder = 'unsupervised'
    load_dir = os.path.join(load_path,load_path,sub_folder)
    stack_AE = SAE.load(load_dir)

    reconstructed_layer_value, error = stack_AE.reconstruct(test_set_x)
    print("The error of the loaded network reconstruction is:  {0}".format(error.eval()), "%")
"""
Exemple #45
0
        dMethods['naiveBayes'] =  analyse.analyse(train_s= train_RM_s,
                                                  train2_s= train_RM_s_2,
                                                  valid_s= valid_RM_s,
                                                  method_name = 'naiveBayes',
                                                  kwargs = kwargs_bayes)
        """
        # SVM
        """
        kwargs_svm ={}
        dMethods['svm'] = analyse.analyse(train_s, valid_s,'svm', kwargs_svm)
        """
        # K NEIGHBORS
        kwargs_kn = {'n_neighbors': 20}
        dMethods['kNeighbors_RM_' + str(n_removeFeatures)] = analyse.analyse(
                                                 train_s= train_RM_s,
                                                 train2_s= train_RM_s_2,
                                                 valid_s= valid_RM_s,
                                                 method_name= 'kNeighbors',
                                                 kwargs= kwargs_kn)
        """
        # LDA
        kwargs_lda = {}
        dMethods['lda'] = analyse.analyse(train_s= train_RM_s,
                                          train2_s= train_RM_s_2,
                                          valid_s= valid_RM_s,
                                          method_name = 'lda',
                                          kwargs = kwargs_lda)

        # QDA
        kwargs_qda= {}
        dMethods['qda'] = analyse.analyse(train_s= train_RM_s,
                                          train2_s= train_RM_s_2,
Exemple #46
0
#!/usr/bin/env python3
#
#  Copyright (c) 2014 Paul Gerrard
#  This program is free software.
#  license: GNU General Public License version 3
#
#  This code is an example from `Lean Python`: http://leanpy.com/
#
from analyse import analyse

numlist = []

while True:
    nextnum = input('Enter a number or blank line:')
    if len(nextnum) == 0:
        break
#
#   try and obtain a floating point number from the input
#
    try:
        num = float(nextnum)
        numlist.append(num)
    except:
        print(nextnum, 'is not numeric')

nmin, nmax, navg, nsum = analyse(numlist)

print(nmin, nmax, navg, nsum)


Exemple #47
0
def main():
    ###############
    ### IMPORT ####
    ###############
    # Importation parameters:
    split= True
    normalize = True
    noise_var = 0.
    ratio_train = 0.9

    # Import the training data:
    print("Extracting the data sets...")
    start = time.clock()
    train_s, valid_s, test_s = tokenizer.extract_data(split = split,
                                                      normalize = normalize,
                                                      noise_variance = 0.,
                                                      #n_classes = "multiclass",
                                                      n_classes = "binary",
                                                      train_size = 200000,
                                                      train_size2 = 0,
                                                      valid_size = 50000)

    stop = time.clock()
    print ("Extraction time: %i s") %(stop-start)

    print train_s[4]

    print(" ")
    print(" ")

    ######################
    ### PRE-TREATMENT ####
    ######################
    print("------------------------- Pre-treatment --------------------------")
    ### Average number of signal per subset:
    print("Train subsets signal average:")
    train_s_average = preTreatment.ratio_sig_per_dataset(train_s[2])
    print(" ")
    print("Valid subsets signal average:")
    valid_s_average = preTreatment.ratio_sig_per_dataset(valid_s[2])

    print(" ")
    print(" ")

    ############
    # ANALYSES #
    ############

    # Dictionnary that will contain all the data for each methods. In the end
    # we'll have a dict of dict
    # Keys of the methods : {naiveBayes, svm, kNeighbors, lda, qda, adaBoost,
    #                       randomForest}
    dMethods ={}

    # NAIVE BAYES:
    kwargs_bayes = {}
    dMethods['naiveBayes'] =  analyse.analyse(train_s, valid_s, 'naiveBayes',
                                              kwargs_bayes)
    # SVM
    """
    kwargs_svm ={}
    dMethods['svm'] = analyse.analyse(train_s, valid_s,'svm', kwargs_svm)
    """
    # K NEIGHBORS
    kwargs_kn = {'n_neighbors':50}
    dMethods['kNeighbors'] = analyse.analyse(train_s, valid_s, 'kNeighbors',
                                             kwargs_kn)

    # LDA
    kwargs_lda = {}
    dMethods['lda'] = analyse.analyse(train_s, valid_s, 'lda', kwargs_lda)
    # QDA
    kwargs_qda= {}
    dMethods['qda'] = analyse.analyse(train_s, valid_s, 'qda', kwargs_qda)

    # ADABOOST
    kwargs_ada= {   'base_estimators': None,
                    'n_estimators': 50,
                    'learning_rate': 1.,
                    'algorithm': 'SAMME.R',
                    'random_state':None}
    dMethods['adaBoost'] = analyse.analyse(train_s, valid_s, 'adaBoost',
                                           kwargs_ada)

    # RANDOM FOREST:
    kwargs_rdf= {'n_trees': 10}
    dMethods['randomForest'] = analyse.analyse(train_s, valid_s, 'randomForest',
                                               kwargs_rdf)

    # RANDOM FOREST 2:
    kwargs_rdf= {'n_trees': 100}
    dMethods['randomForest2'] = analyse.analyse(train_s, valid_s, 'randomForest',
                                               kwargs_rdf)
    # ADABOOST2
    kwargs_ada= {   'base_estimators': None,
                    'n_estimators': 100,
                    'learning_rate': .5,
                    'algorithm': 'SAMME.R',
                    'random_state':None}
    dMethods['adaBoost2'] = analyse.analyse(train_s, valid_s, 'adaBoost',
                                           kwargs_ada)


    print(" ")

    ##################
    # POST-TREATMENT #
    ##################
    print("------------------------ Merged predictor -----------------------")

    #ignore = ['randomForest2', 'randomForest']
    ignore = []

    final_prediction_s, dSl = onTopClassifier.SL_classification(dMethods, valid_s,
                                        train_s, method='svm', ignore = ignore)


    # Transform the probabilities in rank:
    #final_pred = postTreatment.rank_signals(final_pred)

    # Trunk the vectors

    for method in dMethods:
        yProba_s = dMethods[str(method)]['yProba_s']
        yPredicted_s = dMethods[str(method)]['yPredicted_s']

        yPredicted_treshold_s = postTreatment.proba_treshold(yPredicted_s, yProba_s, 0.5)

            # Numerical score:
        if type(yPredicted_s) == list:
            for i in range(len(yPredicted_s)):
                sum_s, sum_b = submission.get_numerical_score(yPredicted_s[i],
                                                          valid_s[2][i])
                print "Subset %i: %i elements - sum_s[%i] = %i - sum_b[%i] = %i" \
                        %(i, yPredicted_s[i].shape[0], i, sum_s, i, sum_b)

        # Get s and b for each group (s_s, b_s) and the final final_s and
        # final_b:
        final_s, final_b, s_s, b_s = submission.get_s_b_8(yPredicted_s, valid_s[2],
                                                  valid_s[3])

        # Balance the s and b
        final_s *= 250000/25000
        final_b *= 250000/25000
        # AMS final:
        AMS = hbc.AMS(final_s , final_b)
        print ("Expected AMS score for randomforest : %f") %AMS
        #AMS by group
        AMS_s = []
        for i, (s,b) in enumerate(zip(s_s, b_s)):
            s *= 250000/yPredicted_s[i].shape[0]
            b *= 250000/yPredicted_s[i].shape[0]
            score = hbc.AMS(s,b)
            AMS_s.append(score)
            print("Expected AMS score for randomforest :  for group %i is : %f" %(i, score))
        print(" ")


    ##############
    # SUBMISSION #
    ##############
    print("-------------------------- Submission ---------------------------")

    # Prediction on the test set:
    # method used for the submission
    # TODO : Verifier que le nom de la method a bien la bonne forme(
    # creer une liste de noms de methodes)

    #method = "randomForest"

    #test_prediction_s, test_proba_s = eval(method).get_test_prediction(
    #                                            dMethods[method]['predictor_s'],
    #                                            test_s[1])

    test_prediction_s, test_proba_s = onTopClassifier.get_SL_test_prediction(
                                                dMethods, dSl, test_s[1])


    print("Test subsets signal average:")
    test_s_average = preTreatment.ratio_sig_per_dataset(test_prediction_s)
    print(" ")

    #RankOrder = np.arange(1,550001)

    if type(test_prediction_s) == list:
        test_prediction_s = np.concatenate(test_prediction_s)
        test_proba_s = np.concatenate(test_proba_s)
        RankOrder = onTopClassifier.rank_signals(test_proba_s)
        ID = np.concatenate(test_s[0])
    else:
        ID = test_s[0]

    # Create a submission file:
    sub = submission.print_submission(ID, RankOrder , test_prediction_s)

    return sub
def insert_null_events(timeout=60):
	import time
	from Queue import Empty
	while pool.call_queue.unfinished_tasks > 0:
		try:
			yield pool.returns.get(timeout=timeout)
		except Empty:
			yield (time.time(),)

out = file('live.log', 'a')
lastactive = time.time()
lastcheck = time.time()
print 'STARTUP'
startup()
for k, v in analyse(insert_null_events(timeout=10)):
	# store
	out.write("%s: %s\n" % (k, v))
	out.flush()
	print k, v
	
	# put logic here:
	# if currently inactive period -- check emails every 10 minutes
	# so that we are up to date when he comes back
	# also check facebook
	inactive = lastactive - time.time() > 10 * 60 and lastcheck - time.time() > 10 * 60
	
	if (inactive and v) or 'switchtabs' in v:
		# good time to interrupt!
		print 'DECIDING to interrupt user'
		interrupt_user()
Exemple #49
0
#encoding: UTF-8

import sys,os
from hashlib import sha256
import json
import analyse
import process
import remove

if not os.path.exists('config.py'): sys.exit(1)

result = ''

init_path = sys.argv[1] if len(sys.argv) > 1 else ''
if not init_path == '' or os.path.exists(init_path):
	result = analyse.analyse(init_path)
	processed = process.process( result )
	remove.remove( result ,True)
else: 
	print 'i need a valid path'
	sys.exit(1)
Exemple #50
0
import argparse


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--build", action='store_true', help="build pubmed articles vector index", required=False)
    parser.add_argument("--find", nargs="+", help="find article by keywords in index", required=False)
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_args()
    if args.find and args.build:
        print("Only one command can be passed")
    elif args.find:
        from analyse import worker as analyse
        keywords = " ".join(args.find)
        analyse(keywords)
    elif args.build:
        from build import worker as build
        build()
    exit(0)
Exemple #51
0
def main():
    ###############
    ### IMPORT ####
    ###############
    # Importation parameters:
    split= True
    normalize = True
    noise_var = 0.
    n_classes = "binary"
    train_size = 200000
    train_size2 = 25000
    valid_size = 25000


    # Import the training data:
    print("Extracting the data sets...")
    start = time.clock()
    train_s, train2_s, valid_s, test_s = tokenizer.extract_data(split= split,
                                                      normalize= normalize,
                                                      noise_variance= noise_var,
                                                      n_classes = n_classes,
                                                      train_size = train_size,
                                                      train_size2 = train_size2,
                                                      valid_size = valid_size)

    # Remerging the y and weights of the validation if necessary:
    if type(valid_s[2]) == list:
        yValid_conca = preTreatment.concatenate_vectors(valid_s[2])
        weights_conca = preTreatment.concatenate_vectors(valid_s[3])

    stop = time.clock()
    print ("Extraction time: %i s") %(stop-start)

    print(" ")
    print(" ")

    ######################
    ### PRE-TREATMENT ####
    ######################
    print("------------------------- Pre-treatment --------------------------")
    ### Average number of signal per subset:
    print("Train subsets signal average:")
    train_s_average = preTreatment.ratio_sig_per_dataset(train_s[2])
    print(" ")
    print("Valid subsets signal average:")
    valid_s_average = preTreatment.ratio_sig_per_dataset(valid_s[2])

    print(" ")
    print(" ")

    ############
    # ANALYSES #
    ############

    # Dictionnary that will contain all the data for each methods. In the end
    # we'll have a dict of dict
    # Keys of the methods : {naiveBayes, svm, kNeighbors, lda, qda, adaBoost,
    #                       randomForest}
    dMethods ={}

    # NAIVE BAYES:
    kwargs_bayes = {}
    dMethods['naiveBayes'] =  analyse.analyse(train_s= train_s, train2_s= train2_s,
                                              valid_s= valid_s,
                                              method_name = 'naiveBayes',
                                              kwargs = kwargs_bayes)
    # SVM
    """
    kwargs_svm ={}
    dMethods['svm'] = analyse.analyse(train_s, valid_s,'svm', kwargs_svm)
    """
    """
    # K NEIGHBORS
    kwargs_kn = {'n_neighbors':50}
    dMethods['kNeighbors'] = analyse.analyse(train_s, valid_s, 'kNeighbors',
                                             kwargs_kn)
    """
    # LDA
    kwargs_lda = {}
    dMethods['lda'] = analyse.analyse(train_s= train_s, train2_s= train2_s,
                                              valid_s= valid_s,
                                              method_name = 'lda',
                                              kwargs = kwargs_lda)

    # QDA
    kwargs_qda= {}
    dMethods['qda'] = analyse.analyse(train_s= train_s, train2_s= train2_s,
                                              valid_s= valid_s,
                                              method_name = 'qda',
                                              kwargs = kwargs_qda)
    """
    # ADABOOST
    kwargs_ada= {   'n_estimators': 50,
                    'learning_rate': 1.,
                    'algorithm': 'SAMME.R',
                    'random_state':None}
    dMethods['adaBoost'] = analyse.analyse(train_s, valid_s, 'adaBoost',
                                           kwargs_ada)
    """
    # RANDOM FOREST:
    kwargs_randomForest= {'n_estimators': 10}
    dMethods['randomForest'] = analyse.analyse(train_s= train_s, train2_s= train2_s,
                                              valid_s= valid_s,
                                              method_name = 'randomForest',
                                              kwargs = kwargs_randomForest)

    # RANDOM FOREST 2:
    kwargs_randomForest= {'n_estimators': 100}
    dMethods['randomForest2'] = analyse.analyse(train_s= train_s, train2_s= train2_s,
                                              valid_s= valid_s,
                                              method_name = 'randomForest',
                                              kwargs = kwargs_randomForest)
    """
    # ADABOOST2
    kwargs_ada= {   'n_estimators': 100,
                    'learning_rate': .5,
                    'algorithm': 'SAMME.R',
                    'random_state':None}
    dMethods['adaBoost2'] = analyse.analyse(train_s, valid_s, 'adaBoost',
                                           kwargs_ada)

    # RANDOM FOREST 3:
    kwargs_randomForest= {'n_estimators': 100}
    dMethods['randomForest3'] = analyse.analyse(train_s= train_s, train2_s= train2_s,
                                              valid_s= valid_s,
                                              method_name = 'randomForest',
                                              kwargs = kwargs_randomForest)

    # RANDOM FOREST 4:
    kwargs_randomForest= {'n_estimators': 100}
    dMethods['randomForest4'] = analyse.analyse(train_s= train_s, train2_s= train2_s,
                                              valid_s= valid_s,
                                              method_name = 'randomForest',
                                              kwargs = kwargs_randomForest)

    # RANDOM FOREST 5:
    kwargs_randomForest= {'n_estimators': 100}
    dMethods['randomForest5'] = analyse.analyse(train_s= train_s, train2_s= train2_s,
                                              valid_s= valid_s,
                                              method_name = 'randomForest',
                                              kwargs = kwargs_randomForest)

    # GRADIENT BOOSTING:
    kwargs_gradB = {'loss': 'deviance', 'learning_rate': 0.1,
                    'n_estimators': 100, 'subsample': 1.0,
                    'min_samples_split': 2, 'min_samples_leaf': 200,
                    'max_depth': 10, 'init': None, 'random_state': None,
                    'max_features': None, 'verbose': 0}

    dMethods['gradientBoosting'] = analyse.analyse(train_s, valid_s,
                                                'gradientBoosting', kwargs_gradB)
    """
    print(" ")

    ##################
    # POST-TREATMENT #
    ##################
    print("------------------------ Feaure importance: -----------------------")

    if type(dMethods['randomForest2']['predictor_s']) == list:
        for i,predictor_s in enumerate(dMethods['randomForest2']['predictor_s']):
            print "Subset %i:" %i
            print predictor_s.feature_importances_
    else:
        print "Dataset: "
        print dMethods['randomForest2']['predictor_s'].feature_importances_


    print("------------------------ On-top predictor -----------------------")
    # Classifiers to be ignored:
    #ignore = ['randomForest2', 'randomForest']
    ignore = []
    clf_onTop = 'randomForest'
    parameters = {}#{'C': 0.5, 'kernel': 'rbf', 'degree': 3, 'gamma': 0.0,
                 # 'coef0': 0.0, 'shrinking':True, 'probability':True,
                 # 'tol': 0.001, 'cache_size': 200, 'class_weight': None}


    print ("We will use an 'on-top' predictor on %i classifiers using a %s.") \
            %(len(dMethods.keys())-len(ignore), clf_onTop)

    final_prediction_s, dOnTop = onTopClassifier.SL_classification(dMethods,
                                        valid_s, train_s,
                                        ignore = ignore,
                                        method= clf_onTop, parameters= parameters)

    print("-------------------------- Tresholding -------------------------")
    ### ON THE 'ON-TOP' CLASSIFIER:
    # Create the elected vectors for each group (best AMS score)
    OT_best_yPredicted_s = [np.zeros(valid_s[2][i].shape[0]) for i in range(8)]
    OT_best_yProba_s = [np.zeros(valid_s[2][i].shape[0]) for i in range(8)]
    OT_best_AMS_s = [0. for i in range(8)]
    OT_best_method_s = [0 for i in range(8)]
    OT_best_ratio_s = [0 for i in range(8)]
    OT_best_sum_s_s = [0 for i in range(8)]
    OT_best_sum_b_s =  [0 for i in range(8)]
    OT_best_method = "On-top"

    OT_yProba_s = dOnTop['yProba_s']
    OT_yPredicted_s = dOnTop['yPredicted_s']

    #Let's concatenate the vectors
    OT_yProba_conca = preTreatment.concatenate_vectors(OT_yProba_s)
    OT_yPredicted_conca = preTreatment.concatenate_vectors(OT_yPredicted_s)

    # Best treshold global
    OT_best_ratio = tresholding.best_treshold(OT_yProba_conca, yValid_conca,
                                                 weights_conca)
    OT_yPredicted_treshold = tresholding.get_yPredicted_treshold(OT_yProba_conca,
                                                                 OT_best_ratio)

    OT_s, OT_b = submission.get_s_b(OT_yPredicted_treshold, yValid_conca,
                                    weights_conca)
    OT_s *= 10
    OT_b *= 10
    OT_best_AMS = hbc.AMS(OT_s,OT_b)


    # COMPARISON BEST TRESHOLD IN DMETHOD
    # FOR EACH METHOD:
    best_yPredicted_s = [np.zeros(valid_s[2][i].shape[0]) for i in range(8)]
    best_yProba_s = [np.zeros(valid_s[2][i].shape[0]) for i in range(8)]
    best_AMS_s = [0. for i in range(8)]
    best_method_s = [0 for i in range(8)]
    best_ratio_s = [0 for i in range(8)]
    best_AMS_1_method = 0.
    best_method = "methode"
    best_ratio = "0."


    for method in dMethods:

        yProba_s = dMethods[method]['yProba_s']
        yPredicted_s = dMethods[method]['yPredicted_s']

        #Let's concatenate the vectors
        yProba_conca = preTreatment.concatenate_vectors(yProba_s)
        yPredicted_conca = preTreatment.concatenate_vectors(yPredicted_s)

        # Best treshold global
        best_treshold = tresholding.best_treshold(yProba_conca, yValid_conca, weights_conca)
        yPredicted_treshold = tresholding.get_yPredicted_treshold(yProba_conca, best_treshold)

        s, b = submission.get_s_b(yPredicted_treshold, yValid_conca, weights_conca)
        s *= 10
        b *= 10
        ams = hbc.AMS(s,b)
        if ams > best_AMS_1_method:
            best_AMS_1_method = ams
            best_method = str(method)
            best_ratio = best_treshold


    # Let's concatenate the 8 vectors which performs the best on each on
    # each of the sub group and tresholding it
    best_yPredicted_conca = preTreatment.concatenate_vectors(best_yPredicted_s)
    best_treshold_conca = tresholding.best_treshold(best_yPredicted_conca, yValid_conca, weights_conca)
    best_yPredicted_conca_treshold = tresholding.get_yPredicted_treshold(best_yPredicted_conca, best_treshold_conca)

    best_final_s, best_final_b, best_s_s, best_b_s = submission.get_s_b_8(best_yPredicted_s, valid_s[2], valid_s[3])
    best_s_treshold, best_b_treshold = submission.get_s_b(best_yPredicted_conca_treshold, yValid_conca, weights_conca)

    best_final_s *= 10
    best_final_b *= 10
    best_s_treshold *= 10
    best_b_treshold *= 10
    best_AMS = hbc.AMS(best_final_s, best_final_b)
    best_AMS_treshold = hbc.AMS(best_s_treshold, best_b_treshold)


    print "Best AMS using one of the methods : %f" %best_AMS_1_method
    print "    method : %s" %(str(method))
    print "    ratio : %f" %(best_ratio)
    print " "
    print "Best AMS concatenate: %f" %best_AMS
    print "Best AMS concatenate  after final tresholding : %f" %best_AMS_treshold
    print "best ratio on the concatenated vector : %f" %best_treshold_conca
    print " "
    print "Best AMS on-top : %f" %OT_best_AMS
    print "Best ratio on the concatenated vector : %f" %OT_best_ratio
    print " "



    """
    # Best treshold group by group
    for i in range(8):
        OT_best_treshold_s = tresholding.best_treshold(OT_yProba_s[i],
                                                       valid_s[2][i],
                                                       valid_s[3][i])

        OT_yPredicted_s[i] = tresholding.get_yPredicted_treshold(OT_yProba_s[i],
                                                              OT_best_treshold_s)

        s, b = submission.get_s_b(OT_yPredicted_s[i], valid_s[2][i],
                                  valid_s[3][i])

        s *= 250000/yPredicted_s[i].shape[0]
        b *= 250000/yPredicted_s[i].shape[0]

        ams = hbc.AMS(s,b)
        if ams > best_AMS_s[i]:
            best_yPredicted_s[i] = yPredicted_s[i]
            best_yProba_s[i] = yProba_s[i]
            best_AMS_s[i] = ams
            best_method_s[i] = dOnTop['method']
            best_ratio_s[i] = best_treshold
            best_sum_s_s[i] = s
            best_sum_b_s[i] =  b

    for n in range(8):
        print "Best AMS group %i: %f - method %s - ratio %f" \
                %(n, best_AMS_s[n], best_method_s[n], best_ratio_s[n])

    print "Best AMS : %f" %best_AMS_1_method
    print "    ratio : %f" %(best_ratio)
    print " "
    """



    """
    ##############
    # SUBMISSION #
    ##############
    print("-------------------------- Submission ---------------------------")

    # Prediction on the test set:
    # method used for the submission
    # TODO : Verifier que le nom de la method a bien la bonne forme(
    # creer une liste de noms de methodes)

    #method = "randomForest"

    #test_prediction_s, test_proba_s = eval(method).get_test_prediction(
    #                                            dMethods[method]['predictor_s'],
    #                                            test_s[1])

    test_prediction_s, test_proba_s = onTopClassifier.get_SL_test_prediction(
                                                dMethods, dOnTop, test_s[1])


    print("Test subsets signal average:")
    test_s_average = preTreatment.ratio_sig_per_dataset(test_prediction_s)
    print(" ")

    #RankOrder = np.arange(1,550001)

    if type(test_prediction_s) == list:
        test_prediction_s = np.concatenate(test_prediction_s)
        test_proba_s = np.concatenate(test_proba_s)
        RankOrder = onTopClassifier.rank_signals(test_proba_s)
        ID = np.concatenate(test_s[0])
    else:
        ID = test_s[0]

    # Create a submission file:
    sub = submission.print_submission(ID, RankOrder , test_prediction_s)

    return sub
    """
    return 0
Exemple #52
0
def _addalyse(solr_server, username, since_id=0, remake_profile=True, update_count=1):

    th = TwitterHelp()
    
    # does not use a Twitter API call
    if not th.twitter_contains(username):
        raise AddalyseUserNotOnTwitterError("Couldn't find any trace of '" + username + "'")

    username = th.get_screen_name(username) # canonicalize the name like a bawz  (in the future, though, th.twitter_contains(sdf) might just return this canonical stuffs)
    
    # solr_server can now optionally be a StorageHandler object
    sh = solr_server if isinstance(solr_server, StorageHandler) else StorageHandler(solr_server)

    # remake if not in Solr
    remake_profile = remake_profile or not sh.contains(username)
    
    if remake_profile:
        # get all tweeets from Twitter API 
        tweets = th.get_all_statuses(username)
        if not tweets: 
            e = AddalyseUnableToProcureTweetsError("I couldn't for the love of me extract some tweets for '" +
                                                   username +
                                                   "'. Maybe they just doesn't have any?")
            e.remake_profile = True
            raise e
        
        # latest tweet is first in lists
        new_since_id = tweets[0].id # assumes that the 
        
        # send to analysis
        print "addalyse(remake_profile=" + str(remake_profile) + "): analyzing, '" + username + "'"
        (lovekeywords, hatekeywords) = filter_analysis(analyse(map(lambda x: x.GetText(), tweets)))
        
        # store result in sunburnt
        print "addalyse(remake_profile=" + str(remake_profile) + "): adding, '" + username + "'"
        sh.add_profile(username, lovekeywords, hatekeywords, new_since_id, update_count)
        print "addalyse(remake_profile=" + str(remake_profile) + "): done"
        
    else:
        tweets = th.get_all_statuses(username, since_id) # get all tweets since since_id
        if not tweets:
            e = AddalyseUnableToProcureTweetsError("I couldn't for the love of me extract some tweets for '" +
                                                   username +
                                                   "'. Maybe they just doesn't have any new ones?")
            e.remake_profile = False
            raise e
           
        new_since_id = tweets[0].id
        
        # MERGING

        # send to analysis
        print "addalyse(remake_profile=" + str(remake_profile) + "): analyzing, '" + username + "'"
        (lovekeywords, hatekeywords) = analyse(map(lambda x: x.GetText(), tweets)) # Don't filter the new analysis just yet, merge it first!
        
        # get a users old hatekeywords_list and lovekeywords_list
        doc = sh.get_user_documents(username, 'lovekeywords_list', 'hatekeywords_list')[0]
        
        (lovekeywords_old, hatekeywords_old) = (doc.lovekeywords_pylist, doc.hatekeywords_pylist)
        
        # merge tuples. Also now that we are done mergeing we can start looking for keywords with a too low weight
        (lovemerge, hatemerge) = filter_analysis((merge_keywords(lovekeywords, lovekeywords_old), merge_keywords(hatekeywords, hatekeywords_old)))
        
        # add merged result to database
        print "addalyse(remake_profile=" + str(remake_profile) + "): adding, '" + username + "'"
        sh.add_profile(username, lovemerge, hatemerge, new_since_id, update_count)
        print "addalyse(remake_profile=" + str(remake_profile) + "): done"
        
    # returns true if added to database   
    return True #TODO: should this return True?
Exemple #53
0
print "Deleting the column..."

for i in range(8):
    for index_column in L_delete:
        xsTrain_s[i] = np.delete(xsTrain_s[i], np.s_[index_column],1)
        xsValid_s[i] = np.delete(xsValid_s[i], np.s_[index_column],1)
        xsTest_s[i] = np.delete(xsTest_s[i], np.s_[index_column],1)

print "Training each groups"

dMethods ={}

# NAIVE BAYES:
kwargs_bayes = {}
dMethods['naiveBayes'] =  analyse.analyse(train_s, valid_s, 'naiveBayes', kwargs_bayes)
# SVM
"""
kwargs_svm ={}
dMethods['svm'] = analyse.analyse(train_s, valid_s,'svm', kwargs_svm)
"""
# K NEIGHBORS
kwargs_kn = {'n_neighbors':50}
dMethods['kNeighbors'] = analyse.analyse(train_s, valid_s, 'kNeighbors', kwargs_kn)
# LDA
kwargs_lda = {}
dMethods['lda'] = analyse.analyse(train_s, valid_s, 'lda', kwargs_lda)
# QDA
kwargs_qda= {}
dMethods['qda'] = analyse.analyse(train_s, valid_s, 'qda', kwargs_qda)
# ADABOOST
def analysis(sentence):
    return analyse(sentence)