Example #1
0
def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True):

    proc = Proc(target=_regex_findall, args=(redis_key, regex, item_content, r_set, ))
    try:
        proc.start()
        proc.join(max_time)
        if proc.is_alive():
            proc.terminate()
            Statistics.incr_module_timeout_statistic(module_name)
            err_mess = "{}: processing timeout: {}".format(module_name, item_id)
            print(err_mess)
            publisher.info(err_mess)
            return []
        else:
            if r_set:
                all_items = r_serv_cache.smembers(redis_key)
            else:
                all_items = r_serv_cache.lrange(redis_key, 0 ,-1)
            r_serv_cache.delete(redis_key)
            proc.terminate()
            return all_items
    except KeyboardInterrupt:
        print("Caught KeyboardInterrupt, terminating workers")
        proc.terminate()
        sys.exit(0)
Example #2
0
def MultinomialNaiveBayesGridSearch_OLD():  
    # C=1 is best
    cs = 10.0**np.arange(-9,2,0.5)
    aucs = []
    for c in cs:
        clf = MultinomialNB(alpha=c).fit(f_train, y_train)
        probs = clf.predict_proba(f_test)
        fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
        roc_auc = auc(fpr,tpr)
        cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search - Multinomial Naive Bayes ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(cs)):
        if aucs[i] > aucs[best]:
            best = i
    c = cs[best]
    clf = MultinomialNB(alpha=c).fit(f_train, y_train)
    probs = clf.predict_proba(f_test)
    fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best alpha = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search - Multinomial Naive Bayes ROC Curve')
    myplt.show()
    return clf
Example #3
0
    def get_facebook_posts(self):
        for user_handle in self.user_handle_list:

            try:
                posts_info = self.graph.get_object(id=user_handle + "/posts")
                post_list = posts_info.get("data", [])

                for post in post_list:
                    facebook_post_obj = {}
                    facebook_post_obj["id"] = post.get("id", "")
                    facebook_post_obj["url"] = ""
                    facebook_post_obj["handle"] = user_handle
                    facebook_post_obj["content"] = post.get("message",
                                                            "").replace(
                                                                "'", "")
                    facebook_post_obj["timestamp"] = post.get(
                        "created_time", "")
                    facebook_post_obj["time_lookup"] = datetime.datetime.now(
                    ).strftime("%Y-%m-%d %H:%M:%S")
                    insert_in_facebook_post(self.conn, facebook_post_obj)
                    #print(facebook_post_obj["id"],":...inserted")
            except StandardError as e:
                #    print user_handle + "\t" + str(e)
                Statistics.add_facebook_error_sites(user_handle + "\t" +
                                                    str(e))
            except facebook.GraphAPIError as e:
                #print user_handle + "\t" + str(e)
                Statistics.add_facebook_error_sites(user_handle + "\t" +
                                                    str(e))
Example #4
0
    def open(self, ga_engine):
        """ Open the database connection

      :param ga_engine: the GA Engine

      .. versionchanged:: 0.6
         The method now receives the *ga_engine* parameter.
      """
        if self.mysqldbmod is None:
            logging.debug("Loading MySQLdb module...")
            self.mysqldbmod = Util.importSpecial("MySQLdb")

        logging.debug("Opening database, host=%s", self.host)
        self.connection = self.mysqldbmod.connect(host=self.host,
                                                  user=self.user,
                                                  passwd=self.passwd,
                                                  db=self.db,
                                                  port=self.port)
        temp_stats = Statistics.Statistics()
        self.createStructure(temp_stats)

        if self.resetDB:
            self.resetStructure(Statistics.Statistics())

        if self.resetIdentify:
            self.resetTableIdentify()
Example #5
0
    def reportMeshLoadingFinished(self):
        print "Loading mesh from file finished..."
        print "Nodes: " + str(len(self.pslg.points))
        print "Segments: " + str(len(self.pslg.segments))
        print "Elements: " + str(len(self.pslg.elements))
        print ""

        print "Omega elements: " + str(len(self.parameters.omega))
        print "Omega area: " + str(self.parameters.omegaArea)
        print "Omega three elements: " + str(len(self.parameters.omegaThree))
        print "Omega three area: " + str(self.parameters.omegaThreeArea)
        print "Omega d segments: " + str(len(self.parameters.omegaD))
        print "Omega d length: " + str(self.parameters.omegaDLength)
        print ""

        print "Mesh statistics..."
        angleMinMax = Statistics.ComputeElementAngleRange(self.parameters)
        print "Element angle min: " + str(angleMinMax[0])
        print "Element angle max: " + str(angleMinMax[1])
        segmentLengthMinMax = Statistics.ComputeSegmentLengthRange(
            self.parameters)
        print "Segment length min: " + str(segmentLengthMinMax[0][0])
        print "Min segment: " + str(segmentLengthMinMax[0][1]) + " -> " + str(
            segmentLengthMinMax[0][2])
        print "Segment length max: " + str(segmentLengthMinMax[1][0])
        print "Max segment: " + str(segmentLengthMinMax[1][1]) + " -> " + str(
            segmentLengthMinMax[1][2])
        print ""
Example #6
0
def getTodayTest():
    params = request.get_json()
    try:
        ticket = params['tic']
    except:
        return {'status': 'bad', 'reason': 'No tic field is found'}
    try:
        price = pickle.load(open('data/tmp_price_%s.pkl' % ticket, 'rb'))
    except:
        # sym = SymbolHistory(ticket, StockAPI.getPriceHistory(ticket, 365*2+50))
        price = StockAPI.getPriceHistory(ticket, 200)
        pickle.dump(price, open('data/tmp_price_%s.pkl' % ticket, 'wb'))
    sym = SymbolHistory(ticket, price=price)
    df = pd.DataFrame(sym.ohcl)
    df['volumn'] = sym.volumn
    df['time'] = sym.time

    avg = df.close.rolling(window=10).mean()
    pf = Statistics.polyfit(df.close, 1, errAccept=0.008, avg=avg)
    pf2 = Statistics.polyfit(df.close, 2, errAccept=0.01, avg=avg)
    df['fitval'] = pf.fitval
    df['fitCurve'] = pf2.fitval

    daily = Statistics.dailyStat(sym)
    df['volRsi'] = daily.volRsi
    df['buyVol'] = daily.buyVol
    df['sellVol'] = daily.sellVol
    df['unkVol'] = df.volumn - daily.sellVol - daily.buyVol
    df['unkVol'][df.unkVol.isnull()] = df.volumn

    df = df.replace({np.nan: None})
    return {'status': 'ok', 'payload': df.to_dict(orient='records')}
Example #7
0
def LogisticGridSearch_OLD():  
    # C=1 is best
    cs = 10.0**np.arange(-1,2,0.25)   
    aucs = []
    for c in cs:
        clf = LogisticRegression(penalty='l1',C=c).fit(f_train, y_train)
        probs = clf.predict_proba(f_test)
        fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
        roc_auc = auc(fpr,tpr)
        cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search - Logistic Regression ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(cs)):
        if aucs[i] > aucs[best]:
            best = i
    c = cs[best]
    clf = LogisticRegression(penalty='l1',C=c).fit(f_train, y_train)
    probs = clf.predict_proba(f_test)
    fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best C = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search - Logistic Regression ROC Curve')
    myplt.show()
    return clf
Example #8
0
def GridSearch(data,params,classifier,classifier_name,paramname,probstype=1,clf_kwargs={}):
    f_train,f_test,y_train,y_test = data
    # C=1 is best
    def getROC(clf,probstype):
        if probstype == 1:
            probs = clf.predict_proba(f_test)
            fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
        else:
            probs = clf.decision_function(f_test)
            fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
        return fpr,tpr
    aucs = []
    mykwargs = clf_kwargs.copy()
    for c in params:
        mykwargs[paramname] = c
        clf = classifier(**mykwargs).fit(f_train, y_train)
        fpr,tpr = getROC(clf,probstype)
        roc_auc = auc(fpr,tpr)
        #cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search: '+classifier_name+' ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(params)):
        if aucs[i] > aucs[best]:
            best = i
    c = params[best]
    mykwargs[paramname] = c
    clf = classifier(**mykwargs).fit(f_train, y_train)
    fpr,tpr = getROC(clf,probstype)
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best '+paramname+' = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search: '+classifier_name+' ROC Curve')
    myplt.show()

    maxAUC = aucs[best]
    cs = params
    optC = params[best]
    
    plt.figure()
    maxauclabel = ("Max AUC = %0.2f, " %maxAUC)+paramname+(" =%s" %optC)
    plt.semilogx(cs,np.ones(len(cs))*maxAUC,'r',label=maxauclabel,linewidth=2,zorder=10)
    plt.semilogx(cs,aucs,zorder=1)
    plt.title('Grid Search: '+classifier_name+'AUC Scores')
    plt.xlabel(paramname)
    plt.ylabel('AUC Score')
    plt.legend(loc="lower right")
    #plt.legend(loc='lower left', bbox_to_anchor=(1, 0),
    #          ncol=1, fancybox=True, shadow=False)
    plt.show()
    
    return clf
Example #9
0
def SGDGridSearch_OLD():  
    # C=1 is best
    cs = 10.0**np.arange(-9,9,1)   
    aucs = []
    for c in cs:
        clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train)
        probs = clf.decision_function(f_test)
        fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
        roc_auc = auc(fpr,tpr)
        cstr = '%0.2e'%c
        myplt = st.plotROC(fpr,tpr,roc_auc,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=False,
                    title='Grid Search - SGD Classifier ROC Curve')
        aucs.append(roc_auc)
    best = 0
    for i in range(len(cs)):
        if aucs[i] > aucs[best]:
            best = i
    c = cs[best]
    clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train)
    probs = clf.decision_function(f_test)
    fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
    myplt = st.plotROC(fpr,tpr,roc_auc,
                    legendlabel='Best C = %0.2e' % c,
                    figure=False,
                    show=False,
                    returnplt=True,
                    showlegend=True,
                    title='Grid Search - SGD Classifier ROC Curve')
    myplt.show()
    return clf, aucs
Example #10
0
def proposition_3():
    print()
    print(" ########## Proposition 3: ##########")
    s1 = Scenario(excel=True,
                  excelName='./Scenarios_Propositions.xlsx',
                  id="P3_1")
    s2 = Scenario(excel=True,
                  excelName='./Scenarios_Propositions.xlsx',
                  id="P3_2")
    s3 = Scenario(excel=True,
                  excelName='./Scenarios_Propositions.xlsx',
                  id="P3_3")
    data1 = s1.simulate()
    data2 = s2.simulate()
    data3 = s3.simulate()
    data1.plotMeanRPS(save=True)
    data2.plotMeanRPS(save=True)
    data3.plotMeanRPS(save=True)
    data1.computeData()
    data2.computeData()
    data3.computeData()
    print("UWM1 <-> UWM3 " +
          stats.getFullStats(data1.uwm, data3.uwm, inter=True))
    print("CWM1 <-> CWM3 " +
          stats.getFullStats(data1.cwm, data3.cwm, inter=True))
Example #11
0
def married_women_vs_unmarried(data):
    """
    print summary statistics of  earnings by married women vs. unmarried women
    :param data:
    :return: None
    """

    women_data = filter_by_features(data, 'female', {1})[0]

    married_women_data, unmarried_women_data = filter_by_features(
        women_data, 'marital', {1, 2, 3})
    population_to_data = {
        'Married Women': married_women_data,
        'Unmarried Women': unmarried_women_data
    }

    statistic_functions = [Statistics.mean, Statistics.median]

    education_scopes = [(0, 10), (11, 20)]

    order_of_print = ['Married Women', 'Unmarried Women']

    print('Question 2:')
    for scope in education_scopes:
        print("If {0}<=Y<={1}, then:".format(scope[0], scope[1]))
        for population in order_of_print:
            Statistics.population_statistics(population,
                                             population_to_data[population],
                                             'education', 'earnings', scope[0],
                                             scope[1], statistic_functions)
Example #12
0
    def learn(self):
        self.train_losses = []
        self.validation_losses = None if self.validation_set is None else []
        self.validation_accuracies = None if self.validation_set is None else []

        xtrain = self.train_set
        ytrain = self.train_labels

        for i in range(Params.MAX_EPOCH):

            loss = Statistics.MSELoss()

            for i, x, y in zip(range(1, len(xtrain) + 1), xtrain, ytrain):
                o = [np.random.normal(0, 1) for _ in range(self.outputDim)]
                loss.update(o, y)

            self.train_losses.append(loss.get())

            if self.validation_set is not None:
                loss = Statistics.MSELoss()
                accuracy = Statistics.MEELoss()
                for x, y in zip(self.validation_set, self.validation_labels):
                    o = [np.random.normal(0, 1) for _ in range(self.outputDim)]
                    loss.update(o, y)
                    accuracy.update(o, y)
                self.validation_losses.append(loss.get())
                self.validation_accuracies.append(accuracy.get())
Example #13
0
def binning_prof (raw, knum=200, NBmin=100, plot=True):
  'Do binning analysis.\
   knum: number of bin-length to be simulated. (k=bin size)\
   NBmin: minimum number of bins.\
   Return: [uncorrelate_data(NBmin)], [bin_size(knum)], [auto_correlation_time(knum)]'
  uncorr_data, ks, corrtime, err = [], [], [], []
  # Calcualte kmax and kmin, bin length is graw as n*kmin, n=1,2,...
  kmax = len(raw) / NBmin
  if kmax == 0: kmax = 1
  kmin = kmax / knum
  if kmin == 0: kmin = 1
  knum = kmax / kmin
  # Mearge the bin with length k=kmin
  basedata = _merge_bin (kmin, raw)
  # Get "_ks" and "_corrtime". "_uncorr_data" remains with the largest bin-length.
  var0 = st.var (raw)
  for q in range(1, knum+1):
    uncorr_data = _merge_bin (q, basedata)
    k, var = q*kmin, st.var (uncorr_data)
    err.append ([k,st.err(uncorr_data)])
    ks.append (k)
    corrtime.append (_auto_corr_time (k, var, var0))
  if plot:
    pl.plot (ks, corrtime, marker='.')
    pl.xlabel ('bin size')
    pl.ylabel ('auto correlation time')
    pl.show()
  return uncorr_data, ks, corrtime
Example #14
0
 def setup_display(self):
     self.FGStats.statsTable.insertRow(0)
     self.FGStats.statsTable.setItem(0, 0,
                                     QtWidgets.QTableWidgetItem(self.name))
     self.FGStats.statsTable.setItem(
         0, 1, QtWidgets.QTableWidgetItem(self.number))
     self.FGStats.statsTable.setItem(
         0, 2, QtWidgets.QTableWidgetItem(self.section))
     self.FGStats.statsTable.setItem(
         0, 3, QtWidgets.QTableWidgetItem(self.semester))
     self.FGStats.statsTable.setItem(
         0, 4,
         QtWidgets.QTableWidgetItem(
             str(Statistics.calculate_mean(self.studentGrades))))
     self.FGStats.statsTable.setItem(
         0, 5,
         QtWidgets.QTableWidgetItem(
             str(Statistics.calculate_median(self.studentGrades))))
     self.FGStats.statsTable.setItem(
         0, 6,
         QtWidgets.QTableWidgetItem(
             str(Statistics.calculate_mode(self.studentGrades))))
     self.FGStats.statsTable.setItem(
         0, 7,
         QtWidgets.QTableWidgetItem(
             str(Statistics.calculate_std_dev(self.studentGrades))))
     self.FGStats.statsTable.resizeColumnsToContents()
Example #15
0
def random_spherical_samples(pipeline):
    """
		generates samples from the tumor according to the specs
	"""
    pipeline.tumor.cells
    samples = []
    if not pipeline.tumor:
        raise Exception('Tumor doesnt exist')
    sampler = SphericalSampler(pipeline.tumor)
    # rand_coordinate = generate_coordinate( np.max( sampler.cell_positions ), np.min( sampler.cell_positions ) )
    rand_coordinate = sample_coordinate(sampler.cell_positions, deviate=True)

    pipeline.print2('Begining Tumor Sampling')
    for radius in pipeline.specs['RADII']:
        # generate 3 random coodinate
        pipeline.print2('Sampling radius:' + str(radius))
        for i in xrange(pipeline.specs.get('repeats', 25)):
            # generate a new coordinate
            # centre = ( rand_coordinate.next() , rand_coordinate.next() , rand_coordinate.next() )
            centre = rand_coordinate.next()

            # conduct the sample
            sample = sampler.sample(radius=radius,
                                    centre=centre,
                                    with_genotypes=True)

            print Statistics.get_drivers_only(sample[1], sampler.tumor.drivers)

            # insert a tuple of (radius, center, sample)
            samples.append((radius, centre, sample))

        pipeline.print2(
            str(pipeline.specs.get('repeats', 25)) + ' samples conducted')
    pipeline.print2('Sampling completed')
    pipeline.samples = samples
Example #16
0
    def main(self, argv):
        dataCleaner = CleanData()
        statsGenerator = Statistics()
        dataJoiner = JoinData()

        df_clean_data = dataCleaner.getCleanData(argv[1]).cache()
        statsGenerator.generateStats(df_clean_data)
        dataJoiner.joinData(df_clean_data)
Example #17
0
def variability(x,y):
    sumxy=Statistics.sumXY(x,y)
    sumx=Statistics.Sum(x)
    sumy=Statistics.Sum(y)
    size=Statistics.size(x)

    sxy=sumxy-(sumx*sumy)/size

    return sxy
Example #18
0
def totalVariability(x):

    size=Statistics.size(x)
    sumsquare=Statistics.sumSquares(x)
    sumx=Statistics.Sum(x)

    totalvariability=sumsquare-(math.pow(sumx,2)/size)

    return totalvariability
Example #19
0
def checkMoneyFlow(symbol: SymbolHistory,
                   lookback=20,
                   intras=None,
                   halfsess=False):
    # dates = symbol.time.iloc[-period:].reset_index(drop=True)

    rsi = symbol.rsi().iloc[-lookback:].reset_index(drop=True)
    priceChg = symbol.close.diff().iloc[-lookback:].reset_index(drop=True)
    date = pd.to_datetime(symbol.time.iloc[-lookback:].reset_index(drop=True),
                          unit='s')
    dailyStat = Statistics.dailyStat(symbol, lookback=lookback)
    movingDir = Statistics.getMovingDirection(symbol.close.iloc[-lookback -
                                                                10:])

    signal = []
    for i in range(5, lookback):
        p = dailyStat.iloc[i]
        s = ''
        if priceChg[i] < 0 and p['volRsiChg'] >= 20:
            s += 'VolRsiBigIncWhenPriceDrop '
        if priceChg[i] < 0 and p['volRsiChg'] > 8 and p['volRsi'] > 60:
            s += 'VolRsiRecoverWhenPriceDrop '
        if p['volRsiChg'] >= 15 and p['volRsi'] > 50:
            s += 'VolRsiBigInc>50 '
        if all([
                p['volRsiChg'] > 0, dailyStat.iloc[i - 1]['volRsiChg'] > 0,
                priceChg[i] < 0, priceChg[i - 1] < 0
        ]):
            s += 'VolRsiPosDiver '
        if p['volRsiChg'] - dailyStat.iloc[i - 1]['volRsiChg'] >= 20:
            s += 'VolRsiBigSwing '

        # if p['volRsi'] > 95:
        #     incCnt = 0
        #     for j in range(5):
        #         if dailyStat.loc[i-j]['volRsiChg'] > 0: incCnt += 1
        #     if incCnt > 3:
        #         s += 'IncrHighVolRSI '

        # if i > 10:
        #     incCnt = 0
        #     for j in range(10):
        #             if dailyStat.loc[i-j]['volRsiChg'] > 0: incCnt += 1
        #     if incCnt > 8:
        #         s += 'VolRSILongStreakInc '
        # if rsi.loc[i-1] < 30 and rsi.loc[i-1] < rsi.loc[i] and p['volRsiChg'] > 0:
        #     s += 'RSI<30ButRecover '

        # if p['volRsi'] > 85:
        #     s += 'VolRsi>85'
        if s != '':
            s = '%s : %s' % (date[i].strftime('%Y/%m/%d'), s)
        signal.append(s)

    return signal, dailyStat
Example #20
0
def statistics_run(data, y_predicted, y_test, arguments):
    # Create statistics object
    statistics = Statistics(data)

    # Plot the genre-distribution
    if "plot" in arguments:
        print("Plotting genre distribution")
        statistics.plot_frequency_of_genres()

    print("\n****RESULTS****\n")

    # Compare predicted values with true-values (accuracy)
    for classifier, predicted_value in y_predicted.items():
        print("Accuracy " + classifier + ":")
        print(
            str(round(100 *
                      metrics.accuracy_score(y_test, predicted_value), 2)) +
            "%\n")

    if "kpi" in arguments:
        statistics.get_KPI_for_each_genre(y_predicted, y_test)

    # Create and visualize confusion matrix
    if "cm" in arguments:
        statistics.calculate_and_plot_confusion_matrix(y_test, y_predicted)
Example #21
0
    def setup_display(self):
        row_count = self.gradesheetTable.rowCount()
        col_count = self.gradesheetTable.columnCount() - 2

        student_grades = []
        #Loop through our grade table
        for col in range(1, col_count):
            row_insert = self.AStats.statsTable.rowCount()
            self.AStats.statsTable.insertRow(row_insert)
            assignment_grades = []
            assignment_name = self.gradesheetTable.horizontalHeaderItem(
                col).get_assignment_name()
            assignment_points = self.gradesheetTable.horizontalHeaderItem(
                col).get_assignment_points()

            self.AStats.statsTable.setItem(
                row_insert, 0, QtWidgets.QTableWidgetItem(assignment_name))
            self.AStats.statsTable.setItem(
                row_insert, 1, QtWidgets.QTableWidgetItem(assignment_points))

            assignment_grades.append(assignment_name)
            assignment_grades.append(assignment_points)
            assignment_grades.append({})
            for row in range(1, row_count):
                student_id = self.gradesheetTable.verticalHeaderItem(
                    row).get_student_uuid()
                grade = self.gradesheetTable.item(row, col).text()
                if grade == "" or grade == "-":
                    grade = 0
                assignment_grades[2][student_id] = float(grade)

            student_grades.append(assignment_grades)

        for counter, assignment in enumerate(student_grades):
            mean = Statistics.calculate_mean(assignment[2].values())
            median = Statistics.calculate_median(assignment[2].values())
            mode = Statistics.calculate_mode(assignment[2].values())
            std_dev = Statistics.calculate_std_dev(assignment[2].values())
            self.AStats.statsTable.setItem(
                counter, 2, QtWidgets.QTableWidgetItem(str(mean)))
            self.AStats.statsTable.setItem(
                counter, 3, QtWidgets.QTableWidgetItem(str(median)))
            self.AStats.statsTable.setItem(
                counter, 4, QtWidgets.QTableWidgetItem(str(mode)))
            self.AStats.statsTable.setItem(
                counter, 5, QtWidgets.QTableWidgetItem(str(std_dev)))

        self.AStats.statsTable.resizeColumnsToContents()
Example #22
0
def BiasScanAnalysis():
    dictOfBiasScanFiles = GetBiasResultFiles()
    numberOfTestedBoards = len(dictOfBiasScanFiles)
    for PowerBoardID in dictOfBiasScanFiles:
        #if not PowerBoardID == str(6):
        #    continue
        print "PowerBoardID " + str(PowerBoardID)
        for PowerUnitID in dictOfBiasScanFiles[PowerBoardID]:
            print "PowerUnitID " + str(PowerUnitID)
            for Load in dictOfBiasScanFiles[PowerBoardID][PowerUnitID]:
                bsData = st.BiasScan()
                bsData.readFile(
                    resultsFolder +
                    dictOfBiasScanFiles[PowerBoardID][PowerUnitID][Load])
                vint, vslope, iint, islope = bsData.visualizeAndCheck()
                dictOfBiasScanFiles[PowerBoardID][PowerUnitID][Load] = iint
                if Load == "High":
                    print "Offset " + str(iint)

        if dictOfBiasScanFiles[PowerBoardID]["Right"][
                "High"] > -0.0075 and dictOfBiasScanFiles[PowerBoardID][
                    "Left"]["High"] > -0.0075:
            print "Grade for this power board is: Inner Layers grade"
        else:
            print "Grade for this power board is: Outer Layers grade"
Example #23
0
def stats(airports, translator):
    stats = Statistics.Statistics(airports, translator)

    # Loop through the options until the user selects 'q'
    while (True):

        printStatsMenu()
        userInput = raw_input()

        if (userInput == "1"):
            print(stats.getLongestFlight())
        elif (userInput == "2"):
            print(stats.getShortestFlight())
        elif (userInput == "3"):
            print(stats.getAverageFlightDistance())
        elif (userInput == "4"):
            print(stats.getLargestCity())
        elif (userInput == "5"):
            print(stats.getSmallestCity())
        elif (userInput == "6"):
            print(stats.getAverageCitySize())
        elif (userInput == "7"):
            print(stats.getContinentsList())
        elif (userInput == "8"):
            print(stats.getHubCity())
        elif (userInput == "q"):
            return
Example #24
0
 def linurit4(self):
     stats = st.statistics()
     print(stats.getAvIncr(self.dfUtlendingarGisti))
     print(stats.getAvIncrMonth(self.dfUtlendingarGisti, 3))
     stats.plotAll('Útlendingar gistinætur',
                   self.dfUtlendingarGisti,
                   months=[8, 9, 10, 11])
Example #25
0
def _merge_bin (k, dat):
    'Binning the data by length k. Return: a shorter data'
    shortdata, i = [], 0
    while (i+k <= len(dat)):
      shortdata.append (st.msum (dat[i:i+k])/float(k))
      i += k
    return shortdata
Example #26
0
def writeTSV(inPath, outPath):
    if not exists(outPath):
        makedirs(outPath)
    captions = ['link', 'argument_id', 'reply_id', 'type', 'question', 'content', 'source']
    for data, debate_id, section in Statistics.dataFiles(inPath):
        if len(data['arguments']['pro']) + len(data['arguments']['pro']) >= 100:
            records = []
            for arg in argument_row(data):
                # arg['debate_id'] = debate_id
                arg['link'] = "https://www.openpetition.de/petition/argumente/" + debate_id
                records.append(arg)

            with open(join(outPath, section + '_' + debate_id + '.tsv'), 'w') as tsvFile:
                writer = Statistics.UnicodeDictWriter(tsvFile, fieldnames=captions, delimiter='\t', lineterminator='\n')
                writer.writeheader()
                writer.writerows(records)
Example #27
0
def proposition_4():
    print()
    print("########## Proposition 4: ##########")
    results = []
    results_full = []
    s = Scenario(excel=True,
                 excelName='./Scenarios_Propositions.xlsx',
                 id="P4_1")
    experts = copy.deepcopy(s.experts)
    for e in experts:
        epsilon = np.zeros(len(e.epsilon))
        info_index = int(np.random.uniform(0, len(epsilon), 1))
        epsilon[info_index] = 1
        e.epsilon = epsilon
    for i in range(2, len(s.experts) + 1):
        s_temp = copy.deepcopy(s)
        s_temp.experts = []
        for j in range(0, i):
            s_temp.experts.append(copy.deepcopy(experts[j]))
        data_temp = s_temp.simulate()
        data_temp.computeData()
        results_full.append(data_temp.uwm)
        results.append(data_temp.getMeanRPS()[1][0])
    for i in range(0, len(results_full) - 1):
        print("UWM (" + str(i + 2) + " Exp) <-> UWM (" + str(i + 3) +
              " Exp) " + stats.getFullStats(
                  results_full[i], results_full[i + 1], inter=True))
    print(results)
Example #28
0
    def run(self):
        self.model_history = np.empty((self.n_periods, ), dtype=Model)
        for i in range(self.n_periods):
            sequence = self.ts[i:i + self.n_train]
            self.__fit_model_to_sequence(
                sequence, None) if i == 0 else self.__fit_model_to_sequence(
                    sequence, self.model_history[i - 1].params.to_array())
            self.model_history[i] = deepcopy(self.model)

            self.trade_logic.update_logic(self.model_history[i], sequence)

            if self._is_trade_open():
                self.trade_history[-1].add_position(
                    i, sequence[-1]
                )  # 'sequence' must be the log_return of the original TS !
                if self.trade_logic.close_trade():
                    self.trade_history[-1].close()
            else:
                if self.trade_logic.open_trade():
                    self.trade_history.append(
                        Trade(i + 1)
                    )  # '+1' since the trade is actually opened the next period
                    # Check if last 'Trade' is empty
        self.__compute_PnL()
        self.PnL_statistics = Statistics(self.PnL)
Example #29
0
    def get_description_information(self):
        for user_handle in self.user_handle_list:
            try:
                site_info = self.graph.get_object(
                    id=user_handle,
                    fields=
                    "id,name,fan_count,rating_count,website,overall_star_rating,username"
                )
                facebook_general_obj = {}
                facebook_general_obj["id"] = site_info.get("id", "")
                facebook_general_obj["handle"] = site_info.get("username", "")
                facebook_general_obj["name"] = site_info.get("name", "")
                if site_info.get("overall_star_rating", "") is "":
                    facebook_general_obj["rating"] = 0
                else:
                    facebook_general_obj["rating"] = site_info.get(
                        "overall_star_rating", "")

                if site_info.get("rating_count", "") is "":
                    facebook_general_obj["count_reviews"] = 0
                else:
                    facebook_general_obj["count_reviews"] = site_info.get(
                        "rating_count", "")

                if site_info.get("fan_count", "") is "":
                    facebook_general_obj["count_likes"] = 0
                else:
                    facebook_general_obj["count_likes"] = site_info.get(
                        "fan_count", "")

                facebook_general_obj["count_followers"] = 0
                facebook_general_obj["time_lookup"] = datetime.datetime.now(
                ).strftime("%Y-%m-%d %H:%M:%S")
                facebook_general_obj[
                    "url"] = "https://www.facebook.com/" + site_info.get(
                        "username", "")
                insert_in_facebook_handle_info(self.conn, facebook_general_obj)

            except StandardError as e:
                #print user_handle + "\t" + str(e)
                Statistics.add_facebook_error_sites(user_handle + "\t" +
                                                    str(e))
            except facebook.GraphAPIError as e:
                #print user_handle + "\t" + str(e)
                Statistics.add_facebook_error_sites(user_handle + "\t" +
                                                    str(e))
        """
Example #30
0
	def stats(self):
		return Statistics._leaving_one_out("./toTrain/Malw","./toTrain/NoMalw",1)
			#DIR_TO_PREDICT = "C:\\Users\\EstebanMontesMorales\\Desktop\\wmdGUI\\wmdLocalGUI\\Predicts\\"


#print WMD().train()
#print WMD().predict()
#print WMD().stats()
def MedianImageColor(InputImage, Ignore=None):
	InputImage = InputImage.convert('L')
	InputImagePixels = InputImage.load()

	# flatten the image so that we can take the median
	ColoredImage = PILToCV2(InputImage)
	GreyImage = cv2.cvtColor(ColoredImage, cv2.COLOR_BGR2GRAY)
	FlatImage = GreyImage.flatten()

	if Ignore != None:
		PrunnedImage = []
		for Item in FlatImage:
			if Item != Ignore:
				PrunnedImage.append(Item)
		return Statistics.Median(PrunnedImage)
	else:
		# take the median
		return Statistics.Median(FlatImage) 
def calcRsi(tradingDays, index, numPeriods):
    if index == 0:
        tradingDays[index]['Gain'] = 0
        tradingDays[index]['Loss'] = 0
        tradingDays[index][f"{numPeriods}DayAvgGain"] = 0
        tradingDays[index][f"{numPeriods}DayAvgLoss"] = 0
        tradingDays[index][f"{numPeriods}DayRelativeStrength"] = 0
        tradingDays[index][f"{numPeriods}DayRSI"] = 0
    else:
        if tradingDays[index]['Close'] > tradingDays[index - 1]['Close']:
            tradingDays[index]['Gain'] = tradingDays[index]['Close'] - tradingDays[index - 1]['Close']
            tradingDays[index]['Loss'] = 0
        else:
            tradingDays[index]['Gain'] = 0
            tradingDays[index]['Loss'] = tradingDays[index - 1]['Close'] - tradingDays[index]['Close']
            
        if index < numPeriods:
            tradingDays[index][f"{numPeriods}DayAvgGain"] = 0
            tradingDays[index][f"{numPeriods}DayAvgLoss"] = 0
            tradingDays[index][f"{numPeriods}DayRelativeStrength"] = 0
            tradingDays[index][f"{numPeriods}DayRSI"] = 0
        elif index == numPeriods:
            tradingDays[index][f"{numPeriods}DayAvgGain"] = Statistics.findSimpleAverage(tradingDays, index - (numPeriods - 1), index, 'Gain')
            tradingDays[index][f"{numPeriods}DayAvgLoss"] = Statistics.findSimpleAverage(tradingDays, index - (numPeriods - 1), index, 'Loss')
            
            if tradingDays[index][f"{numPeriods}DayAvgLoss"] > 0:
                tradingDays[index][f"{numPeriods}DayRelativeStrength"] = tradingDays[index][f"{numPeriods}DayAvgGain"] / tradingDays[index][f"{numPeriods}DayAvgLoss"]
                tradingDays[index][f"{numPeriods}DayRSI"] = 100 - 100 / (1 + tradingDays[index][f"{numPeriods}DayRelativeStrength"])
            else:
                tradingDays[index][f"{numPeriods}DayRelativeStrength"] = 1000000
                tradingDays[index][f"{numPeriods}DayRSI"] = 100 - 100 / (1 + tradingDays[index][f"{numPeriods}DayRelativeStrength"])
        else:
            a = tradingDays[index - 1][f"{numPeriods}DayAvgGain"]
            b = tradingDays[index]['Gain']
            tradingDays[index][f"{numPeriods}DayAvgGain"] = ((numPeriods - 1) * a + b) / numPeriods
            tradingDays[index][f"{numPeriods}DayAvgLoss"] = ((numPeriods - 1) * tradingDays[index - 1][f"{numPeriods}DayAvgLoss"] + tradingDays[index]['Loss']) / numPeriods
    
            if tradingDays[index][f"{numPeriods}DayAvgLoss"] > 0:
                tradingDays[index][f"{numPeriods}DayRelativeStrength"] = tradingDays[index][f"{numPeriods}DayAvgGain"] / tradingDays[index][f"{numPeriods}DayAvgLoss"]
                tradingDays[index][f"{numPeriods}DayRSI"] = 100 - 100 / (1 + tradingDays[index][f"{numPeriods}DayRelativeStrength"])
            else:
                tradingDays[index][f"{numPeriods}DayRelativeStrength"] = 1000000
                tradingDays[index][f"{numPeriods}DayRSI"] = 100 - 100 / (1 + tradingDays[index][f"{numPeriods}DayRelativeStrength"])
 def get_reddit_general(self):
     try:
         print str(strftime("%H:%M:%S", gmtime())) + ": Checking Reddit Genereal!"
         for user_handle in self.user_handle_list:
             subreddit_obj = self.r.subreddit(user_handle)
             reddit_general_obj = {}
             reddit_general_obj["id"] = subreddit_obj.id
             reddit_general_obj["count_readers"] = subreddit_obj.subscribers
             reddit_general_obj["name"] = subreddit_obj.display_name
             reddit_general_obj["information"] = subreddit_obj.description.replace("'","")
             moderator_str = ""
             for moderator in subreddit_obj.moderator():
                 moderator_str += ","+str(moderator)
             reddit_general_obj["moderators"] = moderator_str
             reddit_general_obj["time_lookup"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
             reddit_general_obj["time_creation"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
             insert_in_reddit_handle_info(self.conn,reddit_general_obj)
             Statistics.inc_reddit_descriptions()
     except StandardError as e:
         Statistics.add_reddit_error_sites(user_handle + "\t" + str(e))
Example #34
0
def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30):
    proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, ))
    try:
        proc.start()
        proc.join(max_time)
        if proc.is_alive():
            proc.terminate()
            Statistics.incr_module_timeout_statistic(module_name)
            err_mess = "{}: processing timeout: {}".format(module_name, item_id)
            print(err_mess)
            publisher.info(err_mess)
            return None
        else:
            first_occ = r_serv_cache.get(redis_key)
            r_serv_cache.delete(redis_key)
            proc.terminate()
            return first_occ
    except KeyboardInterrupt:
        print("Caught KeyboardInterrupt, terminating workers")
        proc.terminate()
        sys.exit(0)
def BiasScanAnalysis:
	for load in loads:
	    listOfPURBiasScanFiles = GetResultFiles(PowerUnitID = "Right", load = load, test = "BiasScan")
	    numberOfTestedBoards = len(listOfPURBiasScanFiles)
	    for bscanFile in listOfPURBiasScanFiles: 
		bsData = st.BiasScan() 
		bsData.readFile(bscanFile)
		vint, vslope, ivslope, iint, islope = bsData.visualizeAndCheck()
		resMeasured[load][0].append(ivslopes[0])
	    listOfPULBiasScanFiles = GetResultFiles(PowerUnitID = "Left", load = load, test = "BiasScan")
	    for bscanFile in listOfPULBiasScanFiles:
		bsData = st.VoltageScan() 
		bsData.readFile(bscanFile)
		vints, vslopes, ivslopes, iints, islopes = bsData.visualizeAndCheck()
		resMeasured[load][1].append(ivslopes[0])

            for i in range(2):
                resMean[load][i] = sum(resMeasured[load][i])/len(resMeasured[load][i])
	   
            for i in range(2):
	        resSigma[load][i] = sum([(resMeasured[load][i][j] - resMean[load][i])**2 for j in range(len(resMeasured[load][i]))])/len(resMeasured[load][i])
	def get_description_information(self):
		print str(strftime("%H:%M:%S", gmtime())) + ": Checking Description!"
		conn = open_connection()
		user_handle_list = get_twitter_urls(conn)
		error_sites = []
		try:
			for user_handle in user_handle_list:
				#print "Checking " + user_handle
				user = self.api.get_user(user_handle)
				twitter_general_obj = {}
				twitter_general_obj["join_date"] = user.created_at
				twitter_general_obj["description"] = user.description
				twitter_general_obj["handle"] = user.screen_name
				twitter_general_obj["name"] = user.name
				twitter_general_obj["tweetcount"] = user.statuses_count
				twitter_general_obj["followercount"] = user.followers_count
				twitter_general_obj["location"] = user.location
				twitter_general_obj["desc_link"] = user.url
				twitter_general_obj["time_lookup"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
				insert_in_handle_info(conn,twitter_general_obj)
				Statistics.inc_twitter_descriptions()
		except StandardError as e:
			Statistics.add_twitter_error_sites(user_handle + "\t" + str(e))
			#error_sites.append(user_handle)
		except tweepy.TweepError as e:
			#message = str(e.message[0]['code']) + "\n" + str(e.args[0][0]['code'])
			Statistics.add_twitter_error_sites(user_handle + "\t" + str(e.message))
		conn.close()
    def get_reddit_posts(self):
        try:
            print str(strftime("%H:%M:%S", gmtime())) + ": Checking Reddit Posts!"
            for user_handle in self.user_handle_list:
                subreddit_obj = self.r.subreddit(user_handle)
                for submission in subreddit_obj.hot(limit=100):
                    epoch = datetime.datetime.utcfromtimestamp(int(submission.created_utc))
                    reddit_post_obj = {}
                    reddit_post_obj["url_post"] = submission.url
                    """ Bug needs to be fixed """
                    """if(submission.url == "https://www.sec.gov/news/press-release/2017-184"):
                        print subreddit_obj
                        break"""
                    reddit_post_obj["id"] = submission.id
                    # Check if username is available or deleted
                    hasName = getattr(submission,"name",None)
                    if hasName:
                        reddit_post_obj["name"] = submission.name    
                    else:
                        reddit_post_obj["name"] = "[deleted]"
                    
                    # Check if author is availabe or deleted
                    hasAuthor = getattr(submission,"name",None)
                    if hasAuthor:
                        reddit_post_obj["poster"] = submission.author.name
                    else: 
                        reddit_post_obj["poster"] = "[deleted]"
                    
                    reddit_post_obj["title"] = submission.title.replace("'","")
                    reddit_post_obj["url_comments"] = "https://www.reddit.com/r/"+user_handle+"/comments/"+submission.id
                    reddit_post_obj["time_lookup"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                    reddit_post_obj["timestamp"] = epoch.strftime("%Y-%m-%d %H:%M:%S")

                    insert_in_reddit_post(self.conn,reddit_post_obj)
                    Statistics.inc_reddit_post()
                    #print reddit_post_obj
                    # break
        except StandardError as e:
            Statistics.add_reddit_error_sites(user_handle + "\t" + str(e)+ " @ " + submission.url)
            #print "[get_reddit_posts()-ERROR at " + str(user_handle) + " @ " + submission.url + "\n"+str(e)
def initGraphMod(N,k,niter,alpha=None):
    """
    Init graph, assets are drawn from a power law distribution, 
    other information are chosen at random in ranged specified in the paper
        N: number of nodes
        alpha: power law exponent, if None is drawn at random in [1.5,5.0]
        returns: list of nodes information sorted by decreasing assets
    """
    nodes = {}
    if alpha == None:
        alpha = np.random.uniform(1.5,5)
    sample = Statistics.powerlaw_sample(100, 10**10, alpha,N)

    for i in range(N):
        equity = np.random.uniform(0, 0.25)
        cash = np.random.uniform(0, 0.25)
        # node information
        nodes[i] = {
            'ASSET': sample[i],
            'EQUITY': equity,
            'DEPOSITS': np.random.uniform(0,1-equity),
            'CASH': cash,
            #'LOANS': np.random.uniform(k*(1-cash)/niter,(1-cash)),
            'LOANS': np.random.uniform(k*(1-cash)/niter,(k+1)*(1-cash)/niter),
#            'LOANS': np.random.uniform(0, 1-cash),
            # 0: False, 1: default, 2: failure, 3: exogenous
            'BANKRUPT': 0 
        }      
    # sorting
    sort = sorted(nodes.values(), key=lambda n: n['ASSET'], reverse=True)
    # nodes as dictionary
    nodes = {i: sort[i] for i in range(len(sort))}
    
    # undirected edges
    exp_degree = map(lambda x:nodes[x]['ASSET'],nodes.keys())
    exp_degree = exp_degree / max(exp_degree)
    exp_degree = exp_degree * N
    g = nx.expected_degree_graph(exp_degree,selfloops=False)
    # remove cycles
    #g = nx.bfs_tree(g,0)
    for i in g.nodes():
        g.node[i] = nodes[i]
    return g
		if len(argv)!=7: usage(); exit()
		train_malware_path     = argv[2]
		train_non_malware_path = argv[3]
		fprototypes = argv[4]
		f2wvmodel 	= argv[5]
		ffmodel		= argv[6]
		Classifier._fit(train_malware_path,train_non_malware_path,True,fprototypes,f2wvmodel,ffmodel)
	
	elif argv[1]=="--predict":
		if len(argv)!=7: usage(); exit()
		exe_file,k,path_prototypes,path_w2v_model,path_f_model = argv[2],int(argv[3]),argv[4],argv[5],argv[6]
		it = clock()
		c_class = Classifier._predict(exe_file,k,path_prototypes,path_w2v_model,path_f_model)
		report(exe_file,c_class,clock()-it)
			
	elif argv[1]=="--statistics":
		if len(argv)!=6: usage(); exit()
		type_statistics        = argv[2]
		k					   = int(argv[3])
		train_malware_path     = argv[4]
		train_non_malware_path = argv[5]
		if type_statistics=="-lou": Statistics._leaving_one_out(train_malware_path,train_non_malware_path,k)
		#elif type_statistics=="-bfc": Statistics._cross_validation(...)
		#elif type_statistics=="-hdo": Statistics._hold_out(...)
		#elif type_statistics=="-prt": Statistics._partition(...)
		#elif type_statistics=="-rst": Statistics._resubstitution(...)
		
	else: usage()


Example #40
0
 def median(self,in_val):
     return Statistics.median(list_in_values)
Example #41
0
	def residual(self,theta):
		return stat.norm(theta-self.theta)
Example #42
0
def binning (raw, k):
  uncorr = _merge_bin (k, raw)
  err = st.err(uncorr) * 2
  return st.mean (uncorr), err
Example #43
0
	def stats(self):
		return Statistics._leaving_one_out(MALW_PATH,NOMALW_PATH,1)
Example #44
0
    sel_bool_test = train == 0
    sel_ind_train = np.where(sel_bool_train)[0]
    sel_ind_test = np.where(sel_bool_test)[0]
    
    f_train = features[sel_ind_train]
    f_test = features[sel_ind_test]
    
    # N
    approved = 1-rejected
    y_train = np.array(approved[sel_bool_train]).astype(int)
    y_test = np.array(approved[sel_bool_test]).astype(int)
    
    return f_train,f_test,y_train,y_test

# CLASSIFIERS
'''
clf1 = MultinomialNB().fit(f_train, y_train)
probs = clf1.predict_proba(f_test)
fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
roc_auc = auc(fpr,tpr)
st.plotROC(fpr,tpr,roc_auc,"MultinomialNB")


clf2 = LogisticRegression(penalty='l1').fit(f_train, y_train)
probs = clf2.predict_proba(f_test)
fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
roc_auc = auc(fpr,tpr)
st.plotROC(fpr,tpr,roc_auc,"LogReg")


clf3 = SGDClassifier(penalty='l1').fit(f_train, y_train)
Example #45
0
 def linurit4(self):
     stats = st.statistics()
     print(stats.getAvIncr(self.dfUtlendingarGisti))
     print(stats.getAvIncrMonth(self.dfUtlendingarGisti,3))
     stats.plotAll('Útlendingar gistinætur',self.dfUtlendingarGisti, months = [8,9,10,11])