Esempio n. 1
0
def load_train_test_data():
    rating_train_data = os.path.join(DATA_DIR, 'ua.base')
    rating_test_data = os.path.join(DATA_DIR, 'ua.test')
    ratings_base, ratings_test = ProcessData.process_data(None, None, None, 'train_test_data',
                                                          rating_train_data, rating_test_data)
    # ProcessData.check_loaded_data(users, ratings, items, ratings_base, ratings_test)
    train_data, test_data = ProcessData.create_train_test_SFrame(ratings_base, ratings_test)
    return train_data, test_data
Esempio n. 2
0
def run():
    if config.runMode.find('train') >= 0:
        trainFeature = Feature(config.trainFile, 'train')
        testFeature = Feature(config.testFile, 'test')
    else:
        testFeature = Feature(config.readFile, 'test')

    if config.formatConvert:
        df = dataFormat()
        df.convert()

    config.globalCheck()

    # TODO
    # directoryCheck()

    config.swLog = open(config.outDir + config.fLog, 'w')
    config.swResRaw = open(config.outDir + config.fResRaw, 'w')
    config.swTune = open(config.outDir + config.fTune, 'w')

    if config.runMode.find('tune') >= 0:
        print('\nstart tune...')
        config.swLog.write('\nstart tune...\n')
        tuneStochasticOptimizer()
    elif config.runMode.find('train') >= 0:
        print('\nstart training...')
        config.swLog.write('\nstart training...\n')
        if config.runMode.find('rich') >= 0:
            richEdge.train()
        else:
            train()
    elif config.runMode.find('test') >= 0:
        config.swLog.write('\nstart testing...\n')
        if config.runMode.find('rich') >= 0:
            richEdge.test()
        else:
            test()
        ProcessData.tocrfoutput(config.outFolder + 'outputTag.txt',
                                config.outputFile,
                                config.tempFile + '/test.raw.txt')
    elif config.rumMode.find('cv') >= 0:
        print('\nstart cross validation')
        config.swLog.write('\nstart cross validation\n')
        crossValidation()
    else:
        raise Exception('error')

    config.swLog.close()
    config.swResRaw.close()
    config.swTune.close()

    if config.runMode.find('train') >= 0:
        resSummarize.summarize()

    #ClearDirectory(config.tempFile)

    print('finished.')
Esempio n. 3
0
def preprocess(datapath, outputpath, dsfactor=16):
    filelist2d = glob.glob(datapath + "2d/*")

    for filename in filelist2d:
        fn3d = datapath + "3d/" + os.path.basename(filename).split(
            '.2d.h5')[0] + ".3d.h5"
        data, dataf = pd.loaddata(filename, key="features")
        data3d = pd.loaddata(fn3d, is3D=True)
        dsdata, samples = pd.downsample(data, dsfactor, *data.shape)
        pd.savedata("2d", dataf, dsdata, "features", outputpath,
                    os.path.basename(filename))
Esempio n. 4
0
    def __init__(self):

        self.dataset = ProcessData(train_ratio=0.8,process_raw_data=False,do_augment=False, image_type='OA',
                                   get_scale_center=False, single_sample=True,
                                   do_blur=False, do_crop=False, do_deform=False, do_flip=True)
        self.model = cnn_skipC_OA_model.cnn_skipC_OA_model(
            criterion=nn.MSELoss(),
            optimizer= torch.optim.Adam,
            learning_rate=0.001,
            weight_decay=0
        )
Esempio n. 5
0
 def apiAdd(clientID, sourceID, data, schemaOverRide):
     if isinstance(data, dict):
         dataList = [data]
     else:
         dataList = data
     toInsert = []
     for item in dataList:
         item = ProcessData.flatten(data=item)
         item = ProcessData.enrich(data=item, schemaOverRide=schemaOverRide)
         date = ProcessData.getNow()
         toInsert.append({'created': date, 'data': item})
     SourceHandler.add(clientID=clientID, sourceID=sourceID, data=toInsert)
def CreateAccSummaryString(OutFile):
    df1 = ProcessData.loadBehDataFile(OutFile)
    # Process the file
    df1 = ProcessData.processBehavioralFile(df1)
    # Extract accuracy
    ACCdata = df1.data.Acc.groupby(df1.data.LoadLevels).mean()
    # Create a string of accuracy at each load level
    SumString = ""
    count = 1
    for i in ACCdata:
        SumString = "%s,%d:%2.0f"%(SumString,count,i*100)
        count = count + 1;
    return SumString[1:]
def CreateAccSummaryString(OutFile):
    df1 = ProcessData.loadBehDataFile(OutFile)
    # Process the file
    df1 = ProcessData.processBehavioralFile(df1)
    # Extract accuracy
    ACCdata = df1.data.Acc.groupby(df1.data.LoadLevels).mean()
    # Create a string of accuracy at each load level
    SumString = ""
    count = 1
    for i in ACCdata:
        SumString = "%s,%d:%2.0f" % (SumString, count, i * 100)
        count = count + 1
    return SumString[1:]
def main(portfolio):

    pricedatadf = portfolio.pricedatadf
    fixedday = pd.to_datetime(portfolio.inputdate_init).day

    # Init wheights Dataframe and datetime index (weightsdfindex)
    column_names = ['Weight' + x  for x in portfolio.tickersymbolist]
    weightsdf = pd.DataFrame(columns=column_names)
    weightsdfindex = []

    # first allocation day
    nextallocday = portfolio.pricedatadf[portfolio.inputdate_init:].index[0]
    i = 0
    while nextallocday != 'Out':
        print('++++++++++++++++++++++++++++++++++++++++')


        # get DataFrame with price returns
        # All Days full Year
        #returnsdf = ProcessData.getdailyreturns_year(pricedatadf, nextallocday, fixedday)
        # All Days full Month
        #returnsdf = ProcessData.getdailyreturns_month(pricedatadf, nextallocday, fixedday)
        # One Day per Month During one Year (example: fist day of each month)
        returnsdf = ProcessData.getslicedreturns_month(pricedatadf, nextallocday, fixedday)

        # get next day allocation day
        nextallocday = ProcessData.nextallocationday_month(pricedatadf, nextallocday, fixedday)

        # get covariance matrix and expected returnsdf
        covariance, expectedreturns = getcovexpect(returnsdf)
        # minimize
        weights = minimizefunction(covariance, expectedreturns)

        # get Standard Deviation
        std = getstd(weights, covariance)

        # save date index in wheighs DataFrame
        weightsdfindex.append(returnsdf.index[-1])
        weightsdf.loc[i] = weights
        i += 1

        print('Expected returnsdf:\t' + str(expectedreturns))
        print('Weigths:         \t' + str(weights*100))
        print('Standard Deviation:\t' + str(std))
        print('Next Alloc Day:  \t' + str(nextallocday))
        print()

    # get index and save weightsdf to csv
    weightsdf = getweightsdf(weightsdf, weightsdfindex)

    return weightsdf
Esempio n. 9
0
def ProcessAFile(inputFile,subid,Tag,col):
    print inputFile
    print subid
    df1 = ProcessData.loadBehDataFile(inputFile)
    df1 = ProcessData.processBehavioralFile(df1)    
    wkB = WriteToGoogleSpreadSheet.openWorkBook()    
    # Check to see if a worksheet exists
    worksheetName = 'behData_%s'%(subid)
    try:
        wkS = wkB.worksheet(worksheetName)
    except:
        wkS = WriteToGoogleSpreadSheet.createWorksheet(wkB, subid)

    ProcessData.writeSummaryBehavioralDataToFile(df1, wkS, Tag, col)    
    return df1
Esempio n. 10
0
def builder(directed,weighted,window_size,inputDoc):
    # inputDoc: String
    # directed: True for directed, False for undirected
    # weighted: True for weighted graph, False for unweighted graph
    inputDoc = ProcessData.ProcessDoc(inputDoc)
    # print inputDoc
    # for direction
    if directed:
        G = nx.DiGraph()
    else:
        G = nx.Graph()

    # add edges and nodes
    for s in inputDoc:
        # Ignore the sentence with less than window_size words
        if len(s) >= window_size:
            for word in s:
                # print word
                G.add_node(word,name = word)
            window = list(s[:window_size])
            for i in range(len(s)):
                # print window
                for word in window[1:]:
                    if word != window[0]:
                        if G.has_edge(window[0],word):
                            G[window[0]][word]['weight'] += 1
                        else:
                            G.add_edge(window[0],word,weight=1)
                        # print window[0],word
                if i < (len(s) - window_size):
                    window.append(s[i+window_size])
                del window[0]
    return G
# graph = builder(directed,weighted,window_size,testDoc)
# nx.write_gml(graph,'test1.gml')
Esempio n. 11
0
def auth(request, email, password, action):
    action = action[0]
    password = password[0]
    email = email[0]
    redirectURL = '/?'
    validityCheck = action in [
        'login', 'register'
    ] and ProcessData.typeCheck(typeName='email', value=email) is True
    if validityCheck is True:
        if action == 'login':
            client = Client.get(email=email, password=password)
            client.login(request=request)
            try:
                pass
            except:
                redirectURL += 'error=The entered email and password do not match'
        elif action == 'register':
            try:
                client = Client.create(email=email, password=password)
                client.login(request=request)
            except DuplicateUser:
                redirectURL += 'error=This email address already exist in our records'
    else:
        redirectURL += 'error=Enter a valid email address'
    # return HttpResponse("WORKED")
    return redirect(redirectURL)
Esempio n. 12
0
    def do_POST(self):
        form = cgi.FieldStorage(
            fp=self.rfile,
            headers=self.headers,
            environ={'REQUEST_METHOD':'POST',
                     'CONTENT_TYPE':self.headers['Content-Type'],
                     })
        id = uuid.uuid4()
        logfilepath = 'Data/' + str(id) + '.log'
        with open(logfilepath, 'w') as logfile:
            logfile.write(form['datafile'].value)

        directory = 'SharedData/' + str(id)
        if not os.path.exists(directory):
            os.makedirs(directory)

        jsonobj = ProcessData.fromFile(logfilepath)
        jsonpath = directory + '/tasks.json'
        with open(jsonpath, 'w') as jsonfile:
            jsonfile.write(json.dumps(jsonobj)+"\n")
        os.remove(logfilepath)
        print 
        self.send_response(200)
        self.send_header('Content-type', 'text/html')
        self.end_headers()
        # This is hacky and horrible and I'll look into a better way.
        self.wfile.write(
        """
<!DOCTYPE html><html lang="en"><html><head><meta charset="UTF-8"><title>LegionViz</title><link rel="stylesheet" href="webstyle.css" type="text/css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.6/d3.min.js" charset="utf-8"></script>
</head><body><div id="header"><div><a href="index.html" class="logo"><h1>LegionVis</h1></a></div>
<ul><li><a href="index.html">home</a></li><li class="selected"><a href="tool.html">tool</a></li><li><a href="contact.html">contact</a></li></ul></div>
<div id="body"><div class="header"><p><a href='http://"""+form.headers.get('host')+"/Shared/"+str(id)+"""/display.html'>Click here to access your visualization</a></p></div></div></body></html>        
        """)
Esempio n. 13
0
 def Boot(self, FNAME_process):
     FP_process = open(FNAME_process, 'r')
     for line in FP_process:
         current_process = line.split(',')
         process = ProcessData.Process(self.current_PID, current_process)
         self.QueueProcess(self.input_queue, process)
         self.current_PID += 1
     FP_process.close()
Esempio n. 14
0
def train():
    user_data_file = os.path.join(DATA_DIR, 'u.user')
    rating_data_file = os.path.join(DATA_DIR, 'u.data')
    item_rating_data = os.path.join(DATA_DIR, 'u.item')
    users, ratings, items = ProcessData.process_data(user_data_file, rating_data_file, item_rating_data,
                                                     type='metadata')
    train_data, test_data = load_train_test_data()
    popularity_model, item_sim_model = Recommender.train_models(train_data, users, items)
    return popularity_model, item_sim_model, users, items
Esempio n. 15
0
def Main():

    config = Configuration('configuration.json')

    if config.isProcessingDataFirst:
        ProcessData.process_data_files()

    if config.isGeneratingGraph:
        GraphData.create_plots()

    if config.isGeneratingClocklabFiles:
        FormatDataForClockLab.create_clock_lab_formatted_bulked_out_with_zeros_text_file(
        )

    if config.isGeneratingChronosFitFile:
        FormatDataForChronosFit.create_cronos_fit_formatted_file()

    if config.isShowingIndividualFish and config.isGeneratingDistanceSums:
        DataFormatter.generate_distance_sums_for_individual_fish()
Esempio n. 16
0
def generate(reqId):

    def getRequestParameters():
        query = ("SELECT NODE_ID, SOURCE_ID FROM training_requests WHERE REQUEST_ID = %s")
        cursor.execute(query,(reqId,))
        out = cursor.fetchall()[0]
        return out[0], out[1]

    nodeId, srcId = getRequestParameters()



    input, output,dates = process.getTrainingData(reqId)


    numInputs = len(input[0])
    numOutputs = len(output[0])
    file = os.path.join(dir, "./inputs/input_{}".format(reqId))


    def writeHeader():
        with open(file, 'w') as f:
            f.write("_H:\t$Name")

            for i in range(numInputs):
                if (i == 0):
                    f.write(inpFormat.format(i) + firstFormat.format(numInputs))
                else:
                    f.write(inpFormat.format(i))
            for i in range(numOutputs):
                if (i == 0):
                    f.write(outpFormat.format(i) + firstFormat.format(numOutputs))
                else:
                    f.write(outpFormat.format(i))

    def writeBody():
        with open(file, 'a') as f:
            for i in range(len(input)):
                f.write("\n")
                f.write("_D:\t")
                f.write("{}\t".format(dates[i]))
                for j in input[i]:
                    f.write(str(j)+"\t")
                for j in output[i]:
                    f.write(str(j))



    def addToFile():
        writeHeader()
        writeBody()



    addToFile()
Esempio n. 17
0
def anomaly_detection(path):
    """
    Anomaly detection process.

    :return:
    """
    origin_data, sample_data, sample_abnormal_index, sample_normal_index, abnormal \
        = data.preprocess(path, sample_bound=SAMPLE_BOUND, abnormal_bound=ABNORMAL_BOUND, normal_bound=NORMAL_BOUND, abnormal_rate=ABNORMAL_RATE)
    sample_num = sample_data.shape[0]

    centers, result = graph.classify_abnormal_data(abnormal, K_ABNORMAL)
    Y = init_detection_tag(sample_num, sample_abnormal_index,
                           sample_normal_index, result)

    if USE_LEARNING:
        U, Du, De, H, W, first_abnormal, first_normal = init_hyper_graph(
            sample_data, abnormal, centers)
    else:
        U, first_abnormal, first_normal = init_hyper_graph_quick(
            sample_data, abnormal, centers)

    true_tag = origin_data[:, 0]
    true_data_index = np.nonzero(true_tag > 0)[0].tolist()
    false_data_index = np.nonzero(true_tag <= 0)[0].tolist()
    tp = set(first_abnormal).intersection(set(true_data_index))
    tn = set(first_normal).intersection(set(false_data_index))

    fp = set(first_abnormal).intersection(set(false_data_index))
    if len(fp) > 0:
        fp_array = origin_data[list(fp), :]
        fp_array = fp_array[fp_array[:, 1].argsort()]
        np.savetxt("fp_data.txt", fp_array, delimiter=",")

    accuracy = (len(tp) + len(tn)) * 1.0 / true_tag.shape[0]
    abnormal_precision = len(tp) * 1.0 / len(first_abnormal)
    abnormal_recall = len(tp) * 1.0 / len(true_data_index)
    f_value = 2.0 * abnormal_precision * abnormal_recall / (
        abnormal_precision + abnormal_recall)
    print 'Accuracy:' + str(accuracy)
    print 'First Abnormal precision:' + str(abnormal_precision)
    print 'First Abnormal recall:' + str(abnormal_recall)
    print 'First F value:' + str(f_value)

    if USE_LEARNING:
        F = learn.joint_learning(LAMBDA,
                                 LEARNING_RATE,
                                 U,
                                 Y,
                                 Du,
                                 De,
                                 H,
                                 W,
                                 MU,
                                 joint=USE_JOINT_LEARNING)
        measure_result(F, origin_data)
Esempio n. 18
0
def CreateAccSummaryString(OutFile):
    df1 = ProcessData.loadBehDataFile(OutFile)
    # Process the file
    df1 = ProcessData.processBehavioralFile(df1)
    # Extract accuracy
    ACCdata = df1.data.Acc.groupby(df1.data.LoadLevels).mean()
    # Create a string of accuracy at each load level
    SumString = ""
    count = 1
    if 'TrainFB' in df1.RunType:   
            # If there is feedback then the scoring doesn't work because there are 
            # extra rows in the data file.
        DataScale = 2.6666666
    else:
        DataScale = 1
     
    for i in ACCdata:
        SumString = "%s,%d:%2.0f"%(SumString,count,i*100*DataScale)
        count = count + 1;
    return SumString[1:]
Esempio n. 19
0
 def GenerateModel(dictionary, id):
     demand = ProcessData.ProcessData(dictionary)
     Prediction.GenerateModel(dictionary, demand)
     latestPrediction = Prediction.GetPrediction(dictionary, demand)
     loadDict[id] = latestPrediction
     with open("savedPredictions.txt", "a+") as outfile:
         json.dump({"id": id, "predictions": latestPrediction}, outfile)
         outfile.write("\n")
     print("latestPrediction type: ", type(latestPrediction),
           "\n latestPrediction: ", latestPrediction)
     return "Model Generation completed."
def LoadData(filename):

    xml = filename + ".xml"
    trace = "Data\\" + filename + "\\"
    title, labels, jconf, authors, FullName, organization = ProcessData.ProcessingRawData(
        trace + xml)
    title = ProcessData.Wipe_off_Punctuation(title)
    title_vocab, title_split = ProcessData.Split_Title(title)
    title_one_hot, Max_Sequence_Len, vocab_size = ProcessData.One_hot_encoding(
        title_vocab, title_split)
    title_one_hot_padding = ProcessData.Padding_One_hot(
        title_one_hot, Max_Sequence_Len)
    author_vocab, authors_split = ProcessData.Split_Authors(authors)

    edge_type = np.load(trace + "edge_type.npy")
    edge_list_src = np.load(trace + "edge_list_src.npy")
    edge_list_dst = np.load(trace + "edge_list_dst.npy")

    num_nodes = len(authors_split)
    edge_norm = [1 for i in range(len(edge_type))]

    print("Number of edges: ", len(edge_list_src))
    print("Number of nodes: ", len(authors_split))
    print("Number of class: ", max(labels) + 1)

    train_idx = random.sample(range(len(authors_split)),
                              int(len(authors_split) * 0.8))
    test_idx = []
    for i in range(len(authors_split)):
        if i not in train_idx:
            test_idx.append(i)

    inputs = title_one_hot_padding
    labels = labels
    return edge_type, edge_list_src, edge_list_dst, num_nodes, edge_norm, vocab_size, train_idx, test_idx, inputs, labels
Esempio n. 21
0
def main():
    # Process data
    filename = "Data/pima-indians-diabetes.data.csv"
    dataset = ProcessData.loadCsv(filename)
    # print("Loaded data file {0} with {1} rows".format(filename,len(dataset)))

    splitRatio = 0.67
    train, test = ProcessData.splitDataset(dataset, splitRatio)
    # print("Split {0} rows into train with {1} and test with {2}".format(len(dataset),train,test))

    #Get data feature
    separated = GetFeature.separateByClass(train)
    # print("Separated instances: {0}".format(separated))

    summaries = GetFeature.summarizeByClass(train)
    # print("Summaries[0]: {0}".format(summaries[0]) +"\n"
    #       + "Summaries[1] {0} ".format(summaries[1]))

    #Preciction
    predictions = Prediction.getPredictions(summaries, test)
    accuracy = Prediction.getAccuracy(test, predictions)
    print("Accuracy:{0}%".format(accuracy))
Esempio n. 22
0
def main():
    args = len(sys.argv)

    input_file = sys.argv[1]
    src = sys.argv[2]
    dest = sys.argv[3]

    with open(input_file, 'r') as file:
        data = file.read().split('\n')

    prepared_data = P.prepareData(data)

    if args == 4:
        uniform_cost_search(prepared_data, src, dest)
    elif args == 5:
        heuristic_file = sys.argv[4]
        with open(heuristic_file, 'r') as file:
            data = file.read().split('\n')
        heuristic_data = P.prepareHeuristicData(data)
        a_star_search(prepared_data, src, dest, heuristic_data)
    else:
        print('Invalid number of arguments...')
        sys.exit()
    def dist_plot(self, file, column):
        data = ProcessData.importData(file)
        print('hi')
        data['levels'] = (data[column] - data[column].min()) / (
            data[column].max() - data[column].min())
        sns.despine(fig=self.fig,
                    ax=self.axes,
                    top=True,
                    right=True,
                    left=True,
                    bottom=True)
        plot = sns.distplot(data['levels'], ax=self.axes)

        plot.set_yticks([])
Esempio n. 24
0
def train():
    fileName = "trainData.model"
    test = ReadFile.QAData("training.data")
    test.readFile()

    ProD.wordSeg(test)
    ProD.delHighFre_useless(test)
    ProD.delHighFre_psg(test)
    trainModel(test, fileName)
Esempio n. 25
0
def getData():
    if request.method == 'POST':
        #extract parameters from request.
        year = request.form['year']
        month = request.form['month']
        type = request.form['type']

        #construct a sql date type.
        date = "{0}-{1}-01".format(year, month)

        # todo add a method to build a geoJson.
        snowGeoJson = None
        if (type == 'polygon'):
            snowGeoJson = ProcessData.recoverData(date, type)
            snowGeoJson = json.dumps(snowGeoJson)

        # FOR TESTING SPATIOTEMPORAL SIMULATION FOR ECITY ONLY.
        #-----------------------------
        elif (type == 'ecity'):
            snowGeoJson = ProcessData.recoverData(date, type)
            snowGeoJson = json.dumps(snowGeoJson)
        #------------------------------

        elif (type == 'point'):
            objList = ProcessData.recoverData(date, type)
            snowGeoJson = ProcessData.fromListToJson(objList)
        # Setting a response with json and header.
        response = Response(snowGeoJson, mimetype='application/json')
        # Adding ACAO to response header for security purpose, so it can match the header of client.
        response.headers.add('Access-Control-Allow-Origin', '*')

        # print(response.get_data())
        return response
    # the code below is executed if the request method
    # was GET or the credentials were invalid
    return 'failed'
Esempio n. 26
0
def hhdetection(stop_event):
    while not stop_event.is_set():
        stop_event.wait(5)
        num_distance = 0
        array_i = []
        array_j = []
        distance = []
        sum_distance = 0
        max_avg = 0
        sum_avg = 0
        print()
        print()
        print("=====================================================")
        print("NOW HH DETECTION!!!!")

        for i in range(int(len(gportindex))):
            if sum(gporttraffic[i]) / len(gporttraffic[i]) > max_avg:
                max_avg = sum(gporttraffic[i]) / len(gporttraffic[i])
            sum_avg += sum(gporttraffic[i]) / len(gporttraffic[i])
        avg_avg = sum_avg / len(gportindex)

        for i in range(len(gportindex)):
            if sum(gporttraffic[i]) / len(gporttraffic[i]) > avg_avg:
                for j in range(len(gportindex)):
                    if sum(gporttraffic[j]) / len(gporttraffic[j]) > avg_avg:
                        if gswitchid[i] != gswitchid[j]:
                            dis = ProcessData.euclidean_distance(
                                gporttraffic[i], gporttraffic[j])
                            array_i.append(i)
                            array_j.append(j)
                            distance.append(dis)
                            sum_distance += dis
                            num_distance += 1

        if num_distance != 0:
            average_distance = sum_distance / num_distance
            sort_dis = sorted(range(len(distance)), key=lambda k: distance[k])
            for i in range(len(sort_dis)):
                k = sort_dis[i]
                print("distance[k] is " + str(distance[k]))
                #if distance[k] > average_distance:
                print("Hevey Hitter is " + str(array_i[k]) + " " +
                      str(array_j[k]))
                print("Distance is " + str(distance[k]))
                print(gporttables[array_i[k]])
                print(gporttables[array_j[k]])
        else:
            print("NO HH DETECTED!!!")
Esempio n. 27
0
    def Build_TF_IDF(self):
        for i in range(self.Total_Mails):
            Msg = ProcessData.Process_Msg(self.mails[i])
            count = list()
            for word in Msg:
                if self.labels[i]:
                    self.TF_Spam[word] = self.TF_Spam.get(word, 0) + 1
                else:
                    self.TF_Ham[word] = self.TF_Ham.get(word, 0) + 1

                if word not in count:
                    count += [word]

            for word in count:
                if self.labels[i]:
                    self.IDF_Spam[word] = self.IDF_Spam.get(word, 0) + 1
                else:
                    self.IDF_Ham[word] = self.IDF_Ham.get(word, 0) + 1
Esempio n. 28
0
 def editConvertTypes(cls, clientID, sourceID, key, fromType, toType):
     source = cls.getSource(clientID=clientID, sourceID=sourceID)
     changes = []
     for doc in source.find({"data." + key: {"$type": fromType}}):
         value = doc.get('data', {}).get(key, None)
         if value is None: continue
         value = ProcessData.convertType(value=value, toType=toType)
         if value is None: continue
         doc['data'][key] = value
         changes.append(ReplaceOne({'_id': doc['_id']}, doc))
     if len(changes) > 0:
         source.bulk_write(changes, ordered=False)
     cls.cacheSchema(clientID=clientID,
                     sourceID=sourceID,
                     result=[{
                         'key': key,
                         'type': toType
                     }])
Esempio n. 29
0
 def dummy(clientID, sourceID):
     userIDs = [uuid.uuid4().hex for x in range(100)]
     types = ['Successful', 'Failed']
     eventTypes = ['purchase', 'refund']
     dates = pd.date_range(start=datetime.datetime(2014, 1, 1),
                           end=datetime.datetime.utcnow(),
                           freq='H',
                           tz='UTC')
     data = [{
         'amount':
         random.randint(10, 10000),
         "timestamp":
         (ProcessData.getDate(value=random.choice(dates).isoformat())),
         'userID':
         random.choice(userIDs),
         'event':
         random.choice(eventTypes),
         "status":
         random.choice(types),
     } for x in range(10000)]
     Add.apiAdd(clientID=clientID,
                sourceID=sourceID,
                data=data,
                schemaOverRide={})
Esempio n. 30
0
 def do_GET(self):
     print self.path
     STATICS = ('/Data','/Interface')
     if self.path.startswith('/Shared/'):
         # The beginnings of the link sharing utility.
         pathparts = self.path.split('/')
         id = pathparts[2]
         if pathparts[3] != 'tasks.json':
             pathwithoutshared = self.path[8:]
             index = pathwithoutshared.index('/')
             newpath = pathwithoutshared[index:]
             self.path = newpath
         else:
             self.send_response(200, "OKAY")
             self.end_headers()
             copyfileobj(open('SharedData/'+id+'/tasks.json', 'r'), self.wfile)
             return
     if self.path == "/" or self.path == '/index.html':
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/index.html', 'r'),self.wfile)
     if self.path == "/tool.html":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/tool.html', 'r'),self.wfile)
     if self.path == "/contact.html":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/contact.html', 'r'),self.wfile)
     elif self.path == "/upload.html":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/upload.html', 'r'),self.wfile)
     elif self.path == "/display.html":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/display.html', 'r'),self.wfile)
     elif self.path == "/js/main.js":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/js/main.js', 'r'),self.wfile)
     elif self.path == "/js/mainview.js":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/js/mainview.js', 'r'),self.wfile)
     elif self.path == "/js/histogramview.js":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/js/histogramview.js', 'r'),self.wfile)
     elif self.path == "/js/summaryview.js":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/js/summaryview.js', 'r'),self.wfile)
     elif self.path == "/js/graphview.js":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/js/graphview.js', 'r'),self.wfile)
     elif self.path == "/js/script.js":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/js/script.js', 'r'),self.wfile)
     elif self.path == "/style.css":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/style.css', 'r'),self.wfile)	
     elif self.path == "/webstyle.css":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/webstyle.css', 'r'),self.wfile)	
     elif self.path == "/favicon.ico":
         self.send_response(200, "OKAY")
         self.end_headers()
         copyfileobj(open('Interface/favicon.ico', 'r'),self.wfile)    
     elif self.path == "/tasks.json":
         self.send_response(200, "OKAY")
         self.end_headers()
         self.json_out(ProcessData.fromFile("Data/PROF.log"))    
     elif reduce(lambda a, b: a or b, (self.path.startswith(k) for k in STATICS)):
         SimpleHTTPRequestHandler.do_GET(self)
Esempio n. 31
0
def test(inputFile,subid,Tag,col):
    df1 = ProcessData.loadBehDataFile(inputFile)
    df1 = ProcessData.processBehavioralFile(df1)    
    return df1
Esempio n. 32
0
class CNN_skipCo_OA_trainer(object):
    def __init__(self):

        self.dataset = ProcessData(train_ratio=0.8,process_raw_data=False,do_augment=False, image_type='OA',
                                   get_scale_center=False, single_sample=True,
                                   do_blur=False, do_crop=False, do_deform=False, do_flip=True)
        self.model = cnn_skipC_OA_model.cnn_skipC_OA_model(
            criterion=nn.MSELoss(),
            optimizer= torch.optim.Adam,
            learning_rate=0.001,
            weight_decay=0
        )


        #self.logger = Logger(self)

    def fit(self):
        # get scale and center parameters
        scale_params_low, scale_params_high = utils.load_params(image_type=self.dataset.image_type,
                                                                param_type="scale_params")
        mean_image_low, mean_image_high = utils.load_params(image_type=self.dataset.image_type,
                                                            param_type="mean_images")

        epochs=1
        for e in range(0, epochs):
            # separate names into random batches and shuffle every epoch
            self.dataset.batch_names(batch_size=6)
            # in self.batch_number is the number of batches in the training set
            for i in range(self.dataset.batch_number):
                X, Y = self.dataset.create_train_batches(self.dataset.train_batch_chunks[i])
                # scale and center the batch
                scale_center_X = utils.scale_and_center(X, scale_params_low, mean_image_low,
                                                        image_type=self.dataset.image_type)
                scale_center_Y = utils.scale_and_center(Y, scale_params_high, mean_image_high,
                                                        image_type=self.dataset.image_type)
                scale_center_X = np.array(scale_center_X)
                scale_center_Y = np.array(scale_center_Y)
                #print(scale_center_Y.shape)
                # (C, N, H, W) to (N, C, H, W)
                scale_center_X = scale_center_X.reshape(scale_center_X.shape[0], scale_center_X.shape[3],
                                                        scale_center_X.shape[1], scale_center_X.shape[2])
                scale_center_Y = scale_center_Y.reshape(scale_center_Y.shape[0], scale_center_Y.shape[3],
                                                        scale_center_Y.shape[1], scale_center_Y.shape[2])
                #print(scale_center_X.shape)
                #print(scale_center_Y.shape)

                input_tensor, target_tensor = torch.from_numpy(scale_center_X), torch.from_numpy(scale_center_Y)

                if torch.cuda.is_available():
                    #print('CUDA available')
                    #print('current device ' + str(cur_dev))
                    #print('device count ' + str(torch.cuda.device_count()))
                    #print('device name ' + torch.cuda.get_device_name(cur_dev))

                    cur_dev = torch.cuda.current_device()
                    input_tensor.cuda()
                    target_tensor.cuda()


                self.model.train_model(input_tensor, target_tensor, e)

                ## how to undo the scaling:
                #unscaled_X = utils.scale_and_center_reverse(scale_center_X, scale_params_low, mean_image_low, image_type = self.dataset.image_type)
                #unscaled_Y = utils.scale_and_center_reverse(scale_center_Y, scale_params_high, mean_image_high, image_type=self.dataset.image_type)

    def predict(self):
        #self.model.predict()

        # see self.dataset.X_val and self.dataset.Y_val
        pass

    def log_model(self):
        #self.logger.log(self.model)
        pass
Esempio n. 33
0
File: main.py Progetto: pacant/sgr
#
# Instauro Connessione con l'agent snmp
#
session = Session(hostname=address, community=community, version=int(version))

#
# Creo il database
#
Database.create()

currentTime = time()
while True:
    sleepTime = time() - currentTime

    if sleepTime < interval:
        sleepTime = interval - sleepTime
        sleep(sleepTime)

    currentTime = time()

    values = {}
    values = defaultdict(lambda: "", values)

    for id in oids:
        el = session.get(id)
        values[el.oid] += "%" + el.value

    values = ProcessData.process(values)

    Database.update(values)
Esempio n. 34
0
import os
import ProcessAFile
import datetime
import ProcessData

BaseDir='/home/jason/Dropbox/SteffenerColumbia/Scripts/ExperimentalStimuli/PartialTrialDIR/Scripts/PsychoPyTask_Win32/data'
subid = '1233'
Tag = 'tester'
#FileName = 'PartialTrial_62160613001_1_Run2_2016_06_13_1520.csv'

FileName = 'PartialTrial_62160613001_1_TrainOrderFB_2016_06_13_142.csv'
#FileName = 'PartialTrial_62160613001_1_TrainNoFB_2016_06_13_1422.csv'
inFile = os.path.join(BaseDir,FileName)

ProcessAFile.CreateAccSummaryString(inFile)

OutFile = file


df1 = ProcessData.loadBehDataFile(inFile)
    # Process the file
df1 = ProcessData.processBehavioralFile(df1)

    # Extract accuracy
ACCdata = df1.data.Acc.groupby(df1.data.LoadLevels).mean()

# Split the file name based
from ProcessData import PTdata
import ProcessData
import WriteToGoogleSpreadSheet
from FileSelectClass import Example

def ProcessAFile(inputFile,subid,Tag):
    df1 = ProcessData.loadBehDataFile(inputFile)
    df1 = ProcessData.processBehavioralFile(df1)
    wkB = WriteToGoogleSpreadSheet.openWorkBook()
    wkS = WriteToGoogleSpreadSheet.createWorksheet(wkB, subid)
    writeSummaryBehavioralDataToFile(df1, wkS, Tag)

ex = Example()
# This will ask you to select on or more files
ex.showDialog()
df1 = ProcessData.loadBehDataFile(ex.fileName[0][0])
df1 = ProcessData.processBehavioralFile(df1)
ProcessData.createSPMDesignMatrix(df1, 'P00002001_S0001_Run1_DM')
df2 = ProcessData.loadBehDataFile(ex.fileName[0][1])
df2 = ProcessData.processBehavioralFile(df2)
ProcessData.createSPMDesignMatrix(df1, 'P00002001_S0001_Run2_DM')


wkB = WriteToGoogleSpreadSheet.openWorkBook()
wkS = WriteToGoogleSpreadSheet.createWorksheet(wkB, '2001')
writeSummaryBehavioralDataToFile(df1, wkS, 'Run1')




###########################################################################
Esempio n. 36
0
import ProcessAFile
import ProcessData
import WriteToGoogleSpreadSheet
ex = Example()


inFile = ex.showDialog()

subid = '2003'

print inFile
Tag = 'TrNoFB'
col = 8
ProcessAFile.ProcessAFile(inFile[0][0],subid,Tag,col)

df1 = ProcessData.loadBehDataFile(inFile[0][0])
df1.onlyKeepDataRows()
    # find load levels
df1.whatLoadIsThisRow()
    # check for accuracy
df1.areResponsesCorrect()

df1 = ProcessData.processBehavioralFile(df1)  



Tag = 'Run1'
col = 4
ProcessAFile.ProcessAFile(inFile[0][0],subid,Tag,col)

# Create SPM design matrices
def ProcessAFile(inputFile,subid,Tag):
    df1 = ProcessData.loadBehDataFile(inputFile)
    df1 = ProcessData.processBehavioralFile(df1)
    wkB = WriteToGoogleSpreadSheet.openWorkBook()
    wkS = WriteToGoogleSpreadSheet.createWorksheet(wkB, subid)
    writeSummaryBehavioralDataToFile(df1, wkS, Tag)