Esempio n. 1
0
def crawl(args):
    addr_in = args[0]
    clf = args[1]
    X_test, y_test = gd.get(addr_in)
    prediction = clf.predict(X_test)

    return metrics.confusion_matrix(y_test, prediction)
Esempio n. 2
0
def crawl(args):
    addr_in = args[0]
    clf = args[1]

    X_test, y_test = gd.get(addr_day=addr_in, features_to_get=__FEATURES_TO_GET)
    prediction = clf.predict(X_test)

    return metrics.confusion_matrix(y_test, prediction)
Esempio n. 3
0
def test(addr_test, clf):
    path_in = os.path.join(addr_test, "day_samp_bin.npy")
    X_test, y_test = gd.get(addr_day=addr_test, features_to_get=__FEATURES_TO_GET)

    prediction = clf.predict(X_test)

    confusion_matrix = metrics.confusion_matrix(y_test, prediction)

    tp = confusion_matrix[1, 1]
    fp = confusion_matrix[0, 1]
    tn = confusion_matrix[0, 0]
    fn = confusion_matrix[1, 0]
    total = tp+fp+tn+fn
    recall = round(tp / float(tp+fn), 4)
    filtered = round(float(tn+fn) / total, 4)
    return [tn, fp, fn, tp], round(recall, 4), round(filtered, 4)
Esempio n. 4
0
def train(addr_train, clf, ratio, add_estimators):
    X_train, y_train = gd.get(addr_day=addr_train, ratio=ratio, features_to_get=__FEATURES_TO_GET)

    print "Fitting Model......"
    clf.n_estimators += add_estimators
    clf.fit(X_train, y_train)
    print "Done"

    if __SAVE_MODEL:
        model_name = "RF_" + onoff_line + "_" + str(ratio) + "_Model.p"
        dir_out = os.path.join(addr_train, "Random_Forest_Models")
        if not os.path.isdir(dir_out):
            os.mkdir(dir_out)
        path_out = os.path.join(dir_out, model_name)
        with open(path_out, "w") as file_out:
            pickle.dump(clf, file_out)

    return clf
Esempio n. 5
0
def train():
    print "\n========== Start Training =========="
    list_io_addr = get_io_addr(__TRAIN_DATA)
    clf = BernoulliNB(class_prior=[0.1, 0.9], alpha=0.5)

    for addr_in in list_io_addr:
        print "\nGenerating training set from {}".format(addr_in)
        X_train, y_train = gd.get(addr_in, __RATIO)
        print "Done"

        print "Fitting Model......"
        clf.partial_fit(X_train, y_train, classes=[0, 1])
        print "Done"

    if __SAVE_MODEL:
        with open(__ROOT_MODEL, "w") as file_out:
            pickle.dump(clf, file_out)
        return None
    else:
        return clf
Esempio n. 6
0
def train():
    clf = RandomForestClassifier(n_estimators=40,
                                 max_features="sqrt",
                                 oob_score=True,
                                 warm_start=False,
                                 n_jobs=-1,
                                 random_state=1514)
    list_io_addr = get_io_addr(__TRAIN_DATA)

    for path_in in list_io_addr:
        print "\n>>>>> Start Training on {}".format(path_in)
        X_train, y_train = gd.get(addr_day=path_in, ratio=__RATIO, features_to_get=__FEATURES_TO_GET)

        print "Fitting Model......"
        clf.fit(X_train, y_train)
        print "Done"

    if __SAVE_MODEL:
        with open(__ROOT_MODEL, "w") as file_out:
            pickle.dump(clf, file_out)

    return clf
Esempio n. 7
0
#!/usr/bin/python

import Get_Data as Data
import numpy as np

xData = []

dataFile = ""
dataFile = raw_input(
    'Enter the name of the datafile to find the standard deviation (Blank for \'data.txt\'): \n'
)

if (dataFile == ""):
    dataFile = "data.txt"

Data.getData(dataFile, xData)

tempAvg = 0.0
count = 0
for n in xData:
    tempAvg += n
    count += 1
avg = tempAvg / count

tempSum = 0.0
count = 0
for n in xData:
    tempSum += pow(avg - n, 2)
    count += 1
stdDev = pow(tempSum / (count - 1), 0.5)
Esempio n. 8
0
    if not os.path.exists(path):
        os.mkdir(path)


def Directory():
    CreateDirectory(dataset_path + 'CT_Images/')
    CreateDirectory(dataset_path + 'CT_Images/' + 'Train')
    CreateDirectory(dataset_path + 'CT_Images/' + 'Test')


if __name__ == "__main__":
    #Create some directory
    Directory()

    ImagePaths = ["Chest_Xray/", "Covid_Xray/"]
    md = Data.MakeDataset(ImagePaths)
    md.TrainData()
    md.TestData()

    train_generator = ig.TrainGenerator()
    val_generator = ig.ValGenerator()

    VggModel = BuildModel()
    VggModel.GetModel(input_shape=(224, 224, 3),
                      no_of_class=3,
                      learning_rate=0.001)
    VggModel.TrainModel(train_generator,
                        val_generator,
                        epochs=50,
                        steps_per_epoch=39,
                        validation_steps=10)
Esempio n. 9
0
 def test(self, addr_in):
     X, y = gd.get(addr_in)
     prediction = self.clf.predict(X)
     return y, prediction
Esempio n. 10
0
import G_API
import time
import Get_Data
import Parse_Data


if __name__ == '__main__':
    # -----------------------------------------------------------------------------------------------------------------
    # Start to count execution time and number of shipments in the process.
    start = time.time()
    # -----------------------------------------------------------------------------------------------------------------
    # Create a dict object to save primary reference, file name, and weight information.
    weight_dict = dict()
    file_list = os.listdir(Constant.shearers_xml_path)
    for file in file_list:
        pri_ref, weight = Get_Data.get_info_xml(Constant.shearers_xml_path, file)
        weight_dict[pri_ref] = [weight, Constant.shearers_xml_path + file]
    # -----------------------------------------------------------------------------------------------------------------
    # Convert primary reference list to a string separated by comma.
    pri_ref_str = ','.join([key for key, item in weight_dict.items()])
    # -----------------------------------------------------------------------------------------------------------------
    # Collect report OID, login username and password from .txt files.
    Get_Data.read_login_credentials()
    # -----------------------------------------------------------------------------------------------------------------
    # Use username and password combinations to log into TMS and extract login token from HTML response.
    session_requests, csrf = Post_Data.login_tms()
    # -----------------------------------------------------------------------------------------------------------------
    # Request the first page of the report in HTML format. Extract the response in text format.
    shipment_report_html_script = \
        Get_Data.get_shipment_report_by_report_format(session_requests, csrf, pri_ref_str).text
    # -----------------------------------------------------------------------------------------------------------------
Esempio n. 11
0
@author: Admin
"""


from Get_Relation import *
from Get_Data import *
from Time_Tool import *
from PowLaw import *
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from Tools import *

Data = Get_Data('2006/4','2007/3')
Data.index = range(len(Data))
Relation = Get_Relation(Data,0,0)
Relation = Relation.dropna()

Save_DataFrame(Relation,'2012')

'''
def Get_People_Num(Relation):
    P = set(Relation.ID)|set(Relation.Post_ID)
    return len(P)
 
Active.append(Get_People_Num(Data))

#Relation.columns = [['Source','Target']]
#Save_DataFrame(Relation,'Relation_2008')
Esempio n. 12
0
@author: Admin
"""

from Get_Relation import *
from Get_Data import *
from Time_Tool import *
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

Start = input('Start Time (year/month):    ')
End = input('Start Time (year/month):    ')

Data = Get_Data(Start, End)
ALL_Relation = Get_Relation(Data, 0, 1)

# Remove Duplicate
ALL_Relation_RD = ALL_Relation.drop_duplicates(['P', 'R'])

# Remove Reply Self
Judge = [i != j for i, j in zip(ALL_Relation_RD.P, ALL_Relation_RD.R)]
Relation = ALL_Relation_RD[Judge]
Relation.index = range(len(Relation))
GTime = [Generailze_Time(time) for time in Relation.Time]
Relation['GTime'] = GTime
SRelation = Relation.sort('GTime')
SRelation.index = range(len(SRelation))

IDs = list(set(SRelation.P) | set(SRelation.R))
Esempio n. 13
0
import math
from textblob import TextBlob as tb
from scipy import spatial
import Get_Data as twitterdata

docs = twitterdata.get_data()
for i in range(docs.__len__()):
    docs[i] = tb(docs[i])

n_of_docs = docs.__len__()


def tf(word, blob):
    return blob.words.count(word) / len(blob.words)


def n_containing(word, bloblist):
    return sum(1 for blob in bloblist if word in blob.words)


def idf(word, bloblist):
    return math.log(len(bloblist) / (1 + n_containing(word, bloblist)))


def tfidf(word, blob, bloblist):
    return tf(word, blob) * idf(word, bloblist)


bloblist = [i for i in docs]

# for i, blob in enumerate(bloblist):
Esempio n. 14
0
 def train(self, addr_in, sampling_ratio, sampling_mode):
     X, y = gd.get(addr_in, ratio=sampling_ratio, mode=sampling_mode)
     self.clf.n_estimators += self.add_estimators
     self.clf.fit(X, y)
Esempio n. 15
0
@author: Admin
"""

from Get_Relation import *
from Get_Data import *
from Time_Tool import *
from PowLaw import *
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

import networkx as nx

Data = Get_Data('2006/4', '2006/5')

Relation = Get_Relation(Data, 0, 1)

GTime = [Generailze_Time(time) for time in Relation.Time]

Relation['GTime'] = GTime

Sort_Relation = Relation.sort('GTime')

R = Sort_Relation[['P', 'R']]

#Number = input('N: ')
rl = []
for Number in range(3, 3000):
Esempio n. 16
0
import Get_Data
import json
import numpy.random.common
import numpy.random.bounded_integers
import numpy.random.entropy


if __name__ == '__main__':

    # START COUNTING RUNNING TIME
    start = time.time()

    count = 0

    # LOGIN TMS
    Get_Data.read_login_credentials()
    Get_Data.read_report_oid()
    session_requests, csrf = Post_Data.login_tms()

    # Concatenate for route board report url.
    url_report_routeboard = Constant.url_report_routeboard
    url_report_routeboard = url_report_routeboard.replace('OID_TO_REPLACE', str(Constant.oid_report_so))

    # Request route board report.
    print('Reading data from route board...')
    response = session_requests.get(url_report_routeboard).text
    # Convert json_response.
    route_board_json_responses = json.loads(response)
    print('Route board data read successfully.')

    # Dictionaries for customer loads and execution loads.
import Constant
import Post_Data
import Get_Data
import Config_Post_Data
import time
import G_API
import Input_Collection as Input

if __name__ == '__main__':

    # READ USER LOGIN CREDENTIALS FROM TEXT FILE.
    Get_Data.read_login_credentials()

    # GET INPUT FROM USER.
    input_load_number = 'load number'
    input_pickup_appointment_date = 'pickup appointment date'
    input_pickup_appointment_time = 'pickup appointment time'

    load_numbers = Input.get_general_input(input_load_number)
    pickup_appointment_date = Input.get_date(input_pickup_appointment_date)
    pickup_appointment_time = Input.get_time(input_pickup_appointment_time)

    # START COUNTING RUNNING TIME
    start = time.time()

    print('Pickup appointment', pickup_appointment_date,
          pickup_appointment_time, 'will be added to load(s):', load_numbers,
          '.')

    # LOGIN TMS
    session_requests, csrf = Post_Data.login_tms()
Esempio n. 18
0
import Constant
import Get_Data
import G_API
import Post_Data
import Config_Post_Data
import time

if __name__ == '__main__':

    #START COUNTING RUNNING TIME
    start = time.time()

    #LOGIN TMS
    Get_Data.read_login_credentials()
    session_requests, csrf = Post_Data.login_tms()

    #READ GOOGLE SHEETS VALUE

    print('Loading Google Sheet...')

    workbook_cr = G_API.get_workbook_by_id(Constant.g_sheets_workbook_id_ops)
    worksheet_cr = G_API.get_worksheet_by_id(
        workbook_cr, Constant.g_sheets_worksheet_id_ops)
    sheet_values = G_API.get_values_in_list(worksheet_cr)
    sheet_values = G_API.convert_values_to_dict(
        sheet_values)  #Convert value list to dictionary.
    for key, value_list in sheet_values.items():
        sheet_values[key] = ','.join(value_list)
    assert sheet_values is not None, 'Error: Google Sheet is empty or reading failure.'
    print('Google Sheet loading completed.')
Esempio n. 19
0
import Get_Data as twitterdata
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy import spatial

#corpus=["all is well","best of luck","best of luck","best of all"]
corpus = twitterdata.get_data()
n_of_articles = corpus.__len__()
vectorizer = TfidfVectorizer(min_df=1)
vectorizer.__init__(norm=u'l1', smooth_idf=False)

X = vectorizer.fit_transform(corpus)
# print (dict(zip(vectorizer.get_feature_names(), idf)))

feature_names = vectorizer.get_feature_names()
scores_relative_to_comparing = []
ss = []
doc = 1
feature_index = X[doc, :].nonzero()[1]

writer = open('idf.txt', 'w', encoding="utf8")
for i in range(vectorizer.idf_.__len__()):
    writer.write(str(vectorizer.idf_[i]))
    writer.write(" ")
    writer.write(str(feature_names[i]))
    writer.write("\n")
writer.close()

c = twitterdata.files_in_dir
cc = 0
# for i in range(vectorizer.idf_.__len__()):
#     print(feature_names[i])
Esempio n. 20
0
import Post_Data
import Constant
import os
import G_API
import time
import Get_Data

if __name__ == '__main__':

    # -----------------------------------------------------------------------------------------------------------------
    # Start to count execution time and number of shipments in the process.
    start = time.time()
    # -----------------------------------------------------------------------------------------------------------------
    # Remove Amplify 204 message. Remove unused files.
    Get_Data.remove_unused_files()
    print('Unused files removal completes.')
    # -----------------------------------------------------------------------------------------------------------------
    # Create a dict object to save primary reference, file name, and weight information.
    weight_dict = dict()
    file_list = os.listdir(Constant.amplify_204_path)
    for file in file_list:
        pri_ref, weight = Get_Data.get_info_204(Constant.amplify_204_path,
                                                file)
        weight_dict[pri_ref] = [weight, Constant.amplify_204_path + file]
    # -----------------------------------------------------------------------------------------------------------------
    # Convert primary reference list to a string separated by comma.
    pri_ref_str = ','.join([key for key, item in weight_dict.items()])
    # -----------------------------------------------------------------------------------------------------------------
    # Collect report OID, login username and password from .txt files.
    Get_Data.read_login_credentials()
    # -----------------------------------------------------------------------------------------------------------------
import pandas as pd
import Post_Data
import Constant
import time
import G_API
import Get_Data

if __name__ == '__main__':

    # START COUNTING RUNNING TIME
    start = time.time()

    # LOGIN TMS
    Get_Data.read_login_credentials()
    session_requests, csrf = Post_Data.login_tms()

    # Extract data from the spreadsheet.
    df_shipment = pd.read_excel('Dynamic.xlsx', dtype=str)
    oids = df_shipment['OID'].tolist()

    for oid in oids:

        # Concatenate the load page.
        url_load_html = Constant.url_prefix_load_html
        url_load_html = url_load_html.replace('TO_BE_REPLACE', oid)

        # Concatenate the reference page.
        url_ref_html = Constant.url_prefix_ref_html
        url_ref_html = url_ref_html.replace('TO_BE_REPLACE', oid)

        # Parse the html script of the load page and find the load attached.