Ejemplo n.º 1
0
class ObjectReader():
    from DataReader import DataReader

    #These are the file extentions that have more reading

    #This is to
    OptionalInts = [
            "w3d",
            "w3a",
            "w3q"
    ]
    variableTypes = []
    def __init__(self, filename):
        self.read = DataReader(filename)
        self.variableTypes = [
            self.read.int,
            self.read.float,
            self.read.float,
            self.read.string
        ]
        
        self.fileVersion = self.read.int()
        self.originalInfo = self.readTable()
        self.customInfo = self.readTable()
        
    def readMod(self):
        modInfo = {}
        modInfo["ID"] = self.read.charArray(4)
        varType = self.read.int()
        if filename.split(".")[1] in self.OptionalInts:
            modInfo["level"] = self.read.int()
            modInfo["pointer"] = self.read.int()
        print(varType)
        modInfo["value"] = self.variableTypes[varType]()
        self.read.int() #verify / end thing
        return modInfo
    def readObject(self):
        objectData = {}
        objectData["oldID"] = self.read.charArray(4)
        objectData["newID"] = self.read.charArray(4)
        modCount = self.read.int()
        objectData["mods"] = []
        for i in xrange(modCount):
            objectData["mods"].append(self.readMod())
        return objectData
    def readTable(self):
        tmpLen = self.read.int()
        tmpInfo = []
        if tmpLen > 0:
            for i in xrange(tmpLen):
                tmpInfo.append(self.readObject())
        return tmpInfo
Ejemplo n.º 2
0
 def __init__(self): 
     QtGui.QDialog.__init__(self) 
     self.setupUi(self)        
     self.datreader = DataReader()        
     self.Plotter = Plotter()
     self.directory = os.getcwd()
     self.WorkingD_label.setText(self.directory)
     
     self.ShowFile_PB.clicked.connect(self.show_file_start) # shows first lines in the textbrowser
     self.ReadSets_PB.clicked.connect(self.read_set) # reads all files that start with lineEdit and creates a dict in the Sets_Dict[set][file][column]
     self.PlotFile_PB.clicked.connect(self.plotfile)
     self.MAV_slider.valueChanged.connect(self.mav_valuechanged)
     self.MAV_slider.sliderReleased.connect(self.mav_released)
     self.LP_slider.sliderReleased.connect(self.lp)
     self.LP_slider.valueChanged.connect(self.lp_valuechanged)
     self.HP_slider.sliderReleased.connect(self.hp)
     self.HP_slider.valueChanged.connect(self.hp_valuechanged)
     #self.CutZeros.clicked.connect(self.cut_zeros_filedict)
     self.PlotColumn_PB.clicked.connect(self.plotcolumn)
     self.Clear_PB.clicked.connect(self.clear)
     self.Export_PB.clicked.connect(self.export)
     self.FFT_PB.clicked.connect(self.fft)
     self.ReadLabBook.clicked.connect(self.readlabbook)
     self.MAVEdit.returnPressed.connect(self.mav_released)
     self.MVAREdit.returnPressed.connect(self.mvar)
     self.MMMINEdit.returnPressed.connect(self.mmmin)
     self.Corr_PB.clicked.connect(self.correlate)
     self.Select_PB.clicked.connect(self.open_filedialog)  
     self.Pyro_PB.clicked.connect(self.read_pyro)
     self.Log_PB.clicked.connect(self.log_scale)
     
     self.Sets_Dict = dict() # contains [set1][file1][column1] - the data
     self.Files_Dict = dict() # contains [filename 1]: 'set-filename' 
     self.Columns_Dict = dict() # contains[set-filename-column]: same
Ejemplo n.º 3
0
 def get_answer_part2():
     result = 0
     char_step = 0;
     for paren in DataReader.read_data(1)[0]:
         char_step += 1
         result += 1 if paren == '(' else -1
         if result < 0:
             return char_step
Ejemplo n.º 4
0
    def get_answer_part1(self):
        #2572
        position = Position(0, 0)
        houses = {self.get_dict_key_from_position(position): True}
        for move in DataReader.read_data(day=3)[0]:
            position = AdventDayThree.adjust_pos_for_move(position, move)
            houses[self.get_dict_key_from_position(position)] = True

        return len(houses.keys())
Ejemplo n.º 5
0
    def answer_part_one():
        total_size = 0;
        for wrapping in DataReader.read_data(day=2):
            dimensions = wrapping.replace('\n', '').split('x')
            int_dimensions = [int(dimensions[0]), int(dimensions[1]), int(dimensions[2])]
            int_dimensions = sorted(int_dimensions)

            total_size += 3 * (int_dimensions[0] * int_dimensions[1])
            total_size += 2 * (int_dimensions[1] * int_dimensions[2])
            total_size += 2 * (int_dimensions[2] * int_dimensions[0])
        return total_size
Ejemplo n.º 6
0
 def __init__(self, filename):
     self.read = DataReader(filename)
     self.variableTypes = [
         self.read.int,
         self.read.float,
         self.read.float,
         self.read.string
     ]
     
     self.fileVersion = self.read.int()
     self.originalInfo = self.readTable()
     self.customInfo = self.readTable()
Ejemplo n.º 7
0
    def get_answer_part2(self):
        #2631
        santa = Position(0, 0)
        robot = Position(0, 0)
        houses = {self.get_dict_key_from_position(santa): True}
        move_count = 0
        for move in DataReader.read_data(day=3)[0]:
            move_count += 1
            if move_count % 2:
                santa = self.adjust_pos_for_move(santa, move)
                houses[self.get_dict_key_from_position(santa)] = True
            else:
                robot = self.adjust_pos_for_move(robot, move)
                houses[self.get_dict_key_from_position(robot)] = True

        return len(houses.keys())
Ejemplo n.º 8
0
class PerceptronModel:
    class TrainSplit:
        def __init__(self):
            self.train = []
            self.test = []

    def __init__(self):
        self.reader = DataReader()
        self.list_of_entities = []
        self.list_of_klasses = []
        self.num_of_folds = 10

    def readTitles(self):
        self.list_of_entities = self.reader.get_list_of_entities()

    def crossValidationSplits(self):
        splits = []
        for fold in range(0, self.num_of_folds):
            split = self.TrainSplit()
            length = len(self.list_of_entities)
            for i in range(0, length):
                if i % self.num_of_folds == fold:
                    split.train.append(self.list_of_entities[i])
                else:
                    split.test.append(self.list_of_entities[i])
            splits.append(split)
        return splits

    def classify(self, entity):
        pass

    def train(self, split):
        pass

    def test(self, split):
        pass
Ejemplo n.º 9
0
 def __init__(self):
   self.DR = DataReader()
   self.Features = RuleBasedFeatures()
   self.feats = ["cc", "ck", "bk", "pk", "hk", "oe", "3", "5", "6", "x", 'nword', 'hood', 'bCaret', 'cCaret', 'pCaret', 'hCaret']
   self.gang = {}
Ejemplo n.º 10
0
use_cuda = torch.cuda.is_available()
# use_cuda = False

data_path               = "../ivd_data/preprocessed.h5"
indicies_path           = "../ivd_data/indices.json"
images_features_path    = "../ivd_data/image_features.h5"
crop_features_path      = "../ivd_data/image_features_crops.h5" 

ts                      = str(datetime.datetime.fromtimestamp(time()).strftime('%Y_%m_%d_%H_%M'))
output_file             = "logs/deciderguesser_output" + ts + ".log"
loss_file               = "logs/decider_guesser_loss" + ts + ".log"
hyperparameters_file    = "logs/deciderguesser_hyperparameters" + ts + ".log"


dr = DataReader(data_path=data_path, indicies_path=indicies_path, images_features_path=images_features_path, crop_features_path = crop_features_path)

### Hyperparamters
my_sys                  = getpass.getuser() == 'nabi'
length					= 11
logging                 = True if my_sys else False
save_models             = True if my_sys else False

# OpenNMT Parameters
opt = argparse.Namespace()
opt.batch_size 			= 1
opt.beam_size 			= 5
opt.gpu					= 0
opt.max_sent_length 	= 100
opt.replace_unk 		= True
opt.tgt					= None
Ejemplo n.º 11
0
class ReadW3E():
    def __init__(self, filename):
        self.read = DataReader(filename)
        self.mapInfo = self.ReadMap()
        
    def ReadMap(self):
        mapInfo = self.ReadHeader()
        mapInfo["info"] = []
        for i in xrange((mapInfo["width"])*(mapInfo["height"])):
            mapInfo["info"].append(self.ReadTile())
        return mapInfo
    def ReadHeader(self):
        data = {}
        data["fileID"] = self.read.charArray(4)
        data["formatVersion"] = self.read.int()
        
        data["mainTileSet"] = self.read.char()        
        data["customTileSet"] = self.read.int() #actually is a boolean
        
        data["groundTileSets"] = self.ReadTileset()
        data["cliffTileSets"] = self.ReadTileset()
        
        data["width"] = self.read.int()
        data["height"] = self.read.int()
        
        data["offsetX"] = self.read.float()
        data["offsetY"] = self.read.float()
        return data
    def ReadTileset(self):
        length = self.read.int()
        info = [] 
        for i in range(0,length):
            info.append(self.read.charArray(4))
        return info
    def ReadTile(self):
        tmpData = {}
        tmpData["groundHeight"] = self.read.short()
        tmpData["waterLevel"] = self.read.short() #bit 15 is used for boundary flag 1
        tmpData["nibble1"] = self.read.byte()
        tmpData["textureDetails"] = self.read.byte()
        tmpData["nibble2"] = self.read.byte()
        
        return tmpData
    test_set_num = 25
    train_set_num = 660
    valid_set_num = 7

    path = ['.\data\A', '.\data\B', '.\data\C']

    x_train = np.zeros((1, sample_size, 2))
    x_test = np.zeros((1, sample_size, 2))
    x_valid = np.zeros((1, sample_size, 2))
    y_train = np.zeros((1))
    y_test = np.zeros((1))
    y_valid = np.zeros((1))

    data = [x_train, x_test, x_valid, y_train, y_test, y_valid]

    data_reader = DataReader(data_augment_stride=data_augment_stride,
                             train_set_num=train_set_num)
    for i in range(len(path)):
        data_sub = data_reader.read_data(path[i])
        for i in range(len(data)):
            data[i] = np.append(data[i], data_sub[i], axis=0)

    for i in range(len(data)):
        data[i] = data[i][1:]
    # train set A, test set B
    x_train, x_test, x_valid, y_train, y_test, y_valid = different_load_set(
        data, 'B', 'A')
    # for t_SNE
    y_label = y_test

    y_train = to_categorical(y_train, num_classes=10)
    y_test = to_categorical(y_test, num_classes=10)
Ejemplo n.º 13
0
    def get_answer_part1():
        result = 0
        for paren in DataReader.read_data(1)[0]:
            result += 1 if paren == '(' else -1

        return result
Ejemplo n.º 14
0
@author: haotianteng
"""

from DataReader import DataReader

#############################super parameter
TRANNING_READS = 30000  #Total reads used
TRAIN_WEIGHT = 0.4  #Proportion of reads used to train
TEST_WEIGHT = 0.4  #Proportion of reads used to test
VALID_WEIGHT = 0.2  #Proportion of reads used to validate

#Structure
EVENT_LENGTH = 20  #Length of each sentence
HIDDEN_UNIT_NUM = 24  #Length of the hidden state of each hidden layer
CellLayer = 3  #Number of the hidden layers

#Training
STEP_RATE = 0.5
BATCH_SIZE = 20
EPOCH = 5000
#############################

###############################Read the data
data = DataReader(
    TRAIN_WEIGHT,
    TEST_WEIGHT,
    VALID_WEIGHT,
    EVENT_LENGTH,
    TRANNING_READS,
    event_total=10000,
    file_list='/home/haotianteng/UQ/BINF7000/Nanopore/GN_003/event_pass.dat')
Ejemplo n.º 15
0
Created on Oct 25, 2016

@author: Iegor
'''
from GraphAnimator import GraphAnimator
from DataReader import DataReader
from GreedySolver import solve_tsp
import numpy as np


def make_dist_matrix(points):
    x = []
    y = []
    for point in points:
        x.append(point[0])
        y.append(point[1])
    """Creates distance matrix for the given coordinate vectors"""
    N = len(x)
    xx = np.vstack((x, ) * N)
    yy = np.vstack((y, ) * N)
    return np.sqrt((xx - xx.T)**2 + (yy - yy.T)**2)


if __name__ == "__main__":
    d = DataReader('../../../capitals.txt')
    points = d.readPoints()
    matrix = make_dist_matrix(points)
    path = solve_tsp(matrix)
    g = GraphAnimator(points=points, pathes=path)
    g.beginAnimation()
Ejemplo n.º 16
0
import os
import numpy as np
from sklearn.linear_model import LogisticRegression
from DataReader import DataReader

data_reader = DataReader("./data/us_trial.text", "./data/us_trial.labels")
X, Y = data_reader.get_features()

len_X = len(X)
len_X = (int)(len_X / 10) 
len_train = (int)(0.8*len_X)
print(len_train)
trainX, trainY = X[0:len_train], Y[0:len_train]
trainX, trainY = np.asarray(trainX), np.asarray(trainY)

testX, testY = X[len_train:len_X], Y[len_train:len_X]
testX, testY = np.asarray(testX), np.asarray(testY)
print(len(testX))


if __name__ == '__main__':
    clf = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')
    clf.fit(trainX, trainY)

    counter, len_testX = 0, len(testX)
    for i in range(len_testX):
        output = clf.predict(np.asarray([testX[i]]))
        if output == testY[i]:
            counter += 1
    accuracy = counter / len_testX
    accuracy = accuracy * 100
Ejemplo n.º 17
0
def gen_dataset(seed):
    random_seed = seed
    data_reader = DataReader()
    return DataObject(data_reader, 1, random_seed=random_seed)
Ejemplo n.º 18
0
#            csvwriter.writerow(row)
#            left[int(row[0])] -= 1

#input("Done")

connection_type = Connection_JIT(0, 0, 0)
kv_ty = (numba.types.containers.UniTuple(numba.int64,
                                         2), numba.typeof(connection_type))
master_connection_dict = numba.typed.Dict.empty(*kv_ty)
innov_list = numba.typed.List()
innov_list.append(1)

print("Begin test...")
pr = Predictor()
p = Population(784, 47, 1, master_connection_dict, innov_list, 0)
dr = DataReader()
mapping = dr.get_mapping()
images = dr.get_images(
    112800, 28, 28)  # 112800 images in data set, each image 28x28 pixels
mp = p.networks[0]
for q in range(200):
    print("Adding connection " + str(q + 1), end='\r')
    mp.add_connection()

#mp.randomize_all_bias()

print("\nStarting predictions...")
engines = [[mp, 0, 0]]

pr.make_predictions(engines, mapping, images, 1)
base = engines[0][2]
Ejemplo n.º 19
0
from DataReader import DataReader
from Preprocessor import Preprocessor
from Vectorizer import Vectorizer
from Classifier import Classifier
from DeepLearning import DeepLearner
from sklearn.model_selection import train_test_split as split
import numpy as np

sub_b=['UNT','TIN']

dr_tr = DataReader('./datasets/training-v1/offenseval-training-v1.tsv','B')
tr_data,tr_labels = dr_tr.get_labelled_data()
tr_data,tr_labels = dr_tr.upsample(tr_data,tr_labels,label=0)
tr_data,tr_labels = dr_tr.shuffle(tr_data,tr_labels,'random')

dr_tst = DataReader('./datasets/test-B/testset-taskb.tsv')
tst_data,tst_ids = dr_tst.get_test_data()

tr_data = tr_data[:500]
tr_labels = tr_labels[:500]

##### Naive Bayes - Lemmatize - tfidf
prp = Preprocessor('remove_stopwords')
tr_data_clean = prp.clean(tr_data)
tst_data_clean = prp.clean(tst_data)

vct = Vectorizer('tfidf')
tr_vectors = vct.vectorize(tr_data_clean)
tst_vectors = vct.vectorize(tst_data_clean)

clf = Classifier('M-NaiveBayes')
 def extractor(self):
     #array in which to store people list
     people_list = []
     #initialization of prev_person
     prev_person = ""
     #for each person
     for i in range(0, len(self.data_list)):
         #identification of current person
         curr_person = self.data_list[i].split("_")[0]
         #if a new person is found
         if curr_person != prev_person:
             #if a previuous person exists
             if prev_person != "":
                 #function to calculate features about a person
                 fc = FeaturesCalculator(self.strides_info)
                 #identification of gender: 0 for male, 1 for female
                 if prev_person[0] == 'M':
                     features = fc.calculator(0)
                 elif prev_person[0] == 'F':
                     features = fc.calculator(1)
                 else:
                     features = fc.calculator(-1)
                 #function to store features about a person
                 fs = FeaturesStorage(prev_person, features)
                 fs.storage()
                 #addind prev_person to list
                 people_list.append(prev_person)
                 #to free array with strides data
                 self.strides_info = []
             #updating prev_person
             prev_person = curr_person
             #list of strides about current person
             strides_list = os.listdir(self.strides_dir + self.data_list[i])
             for j in range(0, len(strides_list)):
                 #reading data of the selected stride
                 dr = DataReader(self.strides_dir + self.data_list[i] +
                                 '/' + strides_list[j] + '/kalman/txt/')
                 data, time = dr.reader()
                 #adding stride data to strides_info array
                 self.strides_info.append([time, data])
         else:
             #list of strides about current person
             strides_list = os.listdir(self.strides_dir + self.data_list[i])
             for j in range(0, len(strides_list)):
                 #reading data of the selected stride
                 dr = DataReader(self.strides_dir + self.data_list[i] +
                                 '/' + strides_list[j] + '/kalman/txt/')
                 data, time = dr.reader()
                 #adding stride data to strides_info array
                 self.strides_info.append([time, data])
             #if the last folder is rerached up
             if i == len(self.data_list) - 1:
                 #function to calculate features about a person
                 fc = FeaturesCalculator(self.strides_info)
                 #identification of gender: 0 for male, 1 for female
                 if prev_person[0] == 'M':
                     features = fc.calculator(0)
                 elif prev_person[0] == 'F':
                     features = fc.calculator(1)
                 else:
                     features = fc.calculator(-1)
                 #function to store features about a person
                 fs = FeaturesStorage(prev_person, features)
                 fs.storage()
                 #addind prev_person to list
                 people_list.append(prev_person)
     return people_list
Ejemplo n.º 21
0
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from HyperParameters import HyperParameters
from DataReader import DataReader
from NeuralNet import NeuralNet


def draw(reader, net):
    plt.plot(reader.XTrain, reader.YTrain)
    plt.show()


if __name__ == "__main__":
    reader = DataReader()
    reader.ReadData()

    reader.NormalizeX()
    reader.NormalizeY()

    hp = HyperParameters(13,
                         1,
                         eta=0.001,
                         max_epoch=2000,
                         batch_size=50,
                         eps=1e-5)
    net = NeuralNet(hp)
    net.train(reader, checkpoint=0.2)
    print("W=", net.weight)
    print("B=", net.bias)
Ejemplo n.º 22
0
        # lazy way for the cache decorator
        @memoize
        def _wordBreak(s):
            # print(s)
            results = []
            for word in dict:
                if s == word:
                    results.append(word)
                elif s.startswith(word):
                    # print('got', word)
                    for result in _wordBreak(s[len(word):]):
                        results.append(word + ' ' + result)

            return results

        return _wordBreak(s)


def weight(s):
    weight = 0
    for word in s:
        weight += 10**len(word)
    return weight


if __name__ == "__main__":
    d = DataReader('../../../dict_en.txt')
    tokens = d.readTokens()
    wraps = Solution().wordBreak("thisisatest", tokens)
    print(sorted(wraps, key=weight))
Ejemplo n.º 23
0
def trainModel(experiment, testRun, setTarg):
    print("Training model ...")

    datasetTrain = DataReader(experiment.data["path"])
    datasetTrain.setDatasetClassic("train", experiment.data["feature"],
                                   experiment.data["annotation"])
    if setTarg == "MeanStd": datasetTrain.setTargetMeanStd()
    if testRun: datasetTrain = keepOne(datasetTrain)
    datasetDev = DataReader(experiment.data["path"])
    datasetDev.setDatasetClassic("dev", experiment.data["feature"],
                                 experiment.data["annotation"])
    if setTarg == "MeanStd": datasetDev.setTargetMeanStd()
    if testRun: datasetDev = keepOne(datasetDev)
    if testRun: experiment.maxEpoch = 1
    inp, tar = datasetDev[0]
    experiment.inputDim = inp.shape[1]
    experiment.outputDim = tar.shape[1]
    # print("experiment.outputDim", tar.shape)
    wrapper = getWrapper(experiment)
    wrapper.trainModel(datasetTrain,
                       datasetDev,
                       batchSize=experiment.batchSize,
                       maxEpoch=experiment.maxEpoch,
                       loadBefore=True,
                       tolerance=experiment.tolerance,
                       minForTolerance=experiment.minForTolerance)
    wrapper.saveLogToCSV()
Ejemplo n.º 24
0
 def __init__(self, file_path):
     self.model = models.load_model(file_path)
     self.data_reader = DataReader()
     self.nn_helper_funcs = NNHelperFunctions()
     self.history = []
Ejemplo n.º 25
0
    for item_count in column_counts:
        p = item_count / total
        probabilities.append(p)
    # print("probs", probabilities)

    column_entropy = 0
    for p_i in probabilities:
        column_entropy += entropy(p_i)
    # print("column_entropy", column_entropy)

    return column_entropy


#accessing training data and setting up data frame
train_data_url = 'https://raw.githubusercontent.com/jeniyat/CSE-5521-SP21/master/HW/HW1/Data/train.csv'
dr = DataReader(train_data_url)
dr.read()

df = pd.DataFrame(dr.data)

#CALCULATING ENTROPY OF PREDICTION COLUMN

#getting edible column's counts of values
edible_col = df[0].value_counts().to_list()

probabilities = []

#get total count for current column
total = 0
for column in edible_col:
    total += column
Ejemplo n.º 26
0
 def __init__(self):
     self.reader = DataReader()
     self.list_of_entities = []
     self.list_of_klasses = []
     self.num_of_folds = 10
Ejemplo n.º 27
0
# tf.compat.v1.set_random_seed(seed_value)

# 5. Configure a new global `tensorflow` session
from keras import backend as K
#session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
#sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
#K.set_session(sess)
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

from DataReader import DataReader
from NN_Manager import NN_Manager
#Get the dataSets
#tf.random.set_seed(13)
dataReader = DataReader()

(trainingDataSet,testingDataSet) = dataReader.GetDataSets()
(trainingLabels,testingLabels) = dataReader.GetLabels()

#Create chosen networks
networkManager = NN_Manager(trainingDataSet,testingDataSet,trainingLabels,testingLabels)
#networkManager.addNetwork("CNN")
#networkManager.addNetwork("LargerFilterCNN")
#networkManager.addNetwork("LargeCNN")
networkManager.addNetwork("StackedLSTM")
#networkManager.addNetwork("LSTM")
#networkManager.addNetwork("FFNN")

#networkManager.addNetwork("GRU")
#networkManager.addNetwork("StackedLSTM")
Ejemplo n.º 28
0
from DataReader import DataReader
import numpy as np
import pickle
import os
from time import time

data_path = '../ivd_data/preprocessed.h5'
indicies_path = '../ivd_data/indices.json'

dr = DataReader(data_path=data_path, indicies_path=indicies_path)

# source and target files
src_train = 'data/gw_src_train'
src_valid = 'data/gw_src_valid'
tgt_train = 'data/gw_tgt_train'
tgt_valid = 'data/gw_tgt_valid'

# parameters
length = 15
train_val_ratio = 0.1
n_games_to_train = '_ALL'
sos_token_stirng = '-SOS-'

# get all games
game_ids = dr.get_game_ids()


# prune games
def get_game_ids_with_max_length(length):
    """ return all game ids where all questions are smaller then the given length """
Ejemplo n.º 29
0
# initial_population_size: number of chromosomes in the initial population
# nb_generations: nb of times we will produce a generation (includes tournament, cross over and mutation )
# ratio_cross : ratio of the total population to be crossed over and mutated
# prob_mutate : mutation probability
# k: number of participants on the selection tournaments.
initial_population_size, nb_generations, ratio_cross, prob_mutate, k = 200, 100, 0.8, 0.05, 2

# How many time to run the whole algo: for mini and small: 10 is enough, for big :100
problem_instances = 10
DATA_PATH = "data/data_transportationPb_mini.xlsx"


if __name__ == "__main__":

    # reading the data from the excel file
    dataReader = DataReader(DATA_PATH)
    data = dataReader.read_data()

    data["nb_vehicles"] = 9

    # creating the list of vehicle names
    vehicles = ['vehicle' + str(i) for i in range(0, int(data["nb_vehicles"]))]
    stations = list(data["all_stations"])
    distances = data["distances"]
    mandatory_trips = data["trips"]
    best_results = []

    print("EXECUTING ", problem_instances, " INSTANCES ")
    genetic_problem = GeneticProblem(vehicles, stations, distances, mandatory_trips)
    generation_factory = GenerationFactory(genetic_problem)
    t0 = time()
    return model


def save_model(model_save_dir, model_name, model):
    save_path = '{}/{}'.format(model_save_dir, model_name)
    model.save(save_path)


if __name__ == '__main__':
    source_path = 'data'
    model_save_dir = 'models/'

    img_width, img_height = 200, 200
    model = create_vgg16_model(img_width, img_height)

    data_reader = DataReader(source_path)
    train_images, train_labels = data_reader.get_train_data()
    val_images, val_labels = data_reader.get_val_data()
    test_images, test_labels = data_reader.get_test_data()
    np.set_printoptions(suppress=True)

    history = model.fit(train_images,
                        train_labels,
                        validation_data=(val_images, val_labels),
                        batch_size=16,
                        epochs=10,
                        verbose=1)

    predictions = model.predict(test_images)

    for i in range(len(predictions)):
Ejemplo n.º 31
0
from DataReader import DataReader

x = DataReader('sign-ins.csv')

x.createPlot(x.createDatesList(), x.createCountList())

y = DataReader('sign-ins2.csv')

y.createPlot(y.createDatesList(), y.createCountList())
Ejemplo n.º 32
0
    def read_header(self):
        """Read the header of a MPQ archive."""

        def read_mpq_header(offset=None):
            if offset:
                self.file.seek(offset)
            data = self.file.read(32)
            header = MPQFileHeader._make(
                struct.unpack(MPQFileHeader.struct_format, data))
            header = header._asdict()
            
            if header['format_version'] == 1:
                
                data = self.file.read(12)
                extended_header = MPQFileHeaderExt._make(
                    struct.unpack(MPQFileHeaderExt.struct_format, data))
                header.update(extended_header._asdict())
            return header

        def read_mpq_user_data_header():
            data = self.file.read(16)
            header = MPQUserDataHeader._make(
                struct.unpack(MPQUserDataHeader.struct_format, data))
            header = header._asdict()
            header['content'] = self.file.read(header['user_data_header_size'])
            return header

        magic = self.file.read(4)
        self.file.seek(0)
        
        print(magic)
        
        if magic == "HM3W":
            datReader = DataReader(self.file)
            header = {}
            ## should be HM3W
            header["wc3map_magic"] = datReader.charArray(4)
            
            ## unknown
            datReader.int()
            header["wc3map_mapName"] = datReader.string()
            """
            0x0001: 1=hide minimap in preview screens
            0x0002: 1=modify ally priorities
            0x0004: 1=melee map
            0x0008: 1=playable map size was large and has never been reduced to medium
            0x0010: 1=masked area are partially visible
            0x0020: 1=fixed player setting for custom forces
            0x0040: 1=use custom forces
            0x0080: 1=use custom techtree
            0x0100: 1=use custom abilities
            0x0200: 1=use custom upgrades
            0x0400: 1=map properties menu opened at least once since map creation
            0x0800: 1=show water waves on cliff shores
            0x1000: 1=show water waves on rolling shores
            """
            header["wc3map_mapFlags"] = datReader.flags()
            header["wc3map_maxPlayers"] = datReader.int()
            self.file.read(512 - datReader.index)
            print ("Now position:", self.file.tell())
        else:
            ## If the magic isn't HM3W, we will skip the first 512 bytes of the 
            ## file anyway 
            self.file.seek(512)
            
        print(self.file.tell())
        magic = self.file.read(4)
        self.file.seek(512)
        print( len(magic))
        print(magic, hex(ord(magic[3])) )
        
        if magic == b'MPQ\x1a':
            header.update(read_mpq_header())
            header['offset'] = 512
        elif magic == b'MPQ\x1b':
            user_data_header = read_mpq_user_data_header()
            header.update(read_mpq_header(user_data_header['mpq_header_offset']))
            header['offset'] = user_data_header['mpq_header_offset']
            header['user_data_header'] = user_data_header
            
        else:
            raise ValueError("Invalid file header.")

        return header
Ejemplo n.º 33
0
pred_directory = os.path.join(directory, 'Pred')

# make directory for storing predictions if it does not exist
if os.path.isdir(pred_directory) == False:
  os.makedirs(pred_directory)

# choose network, can be either DRN18 or DRN26
network = 'DRN26'
# set parameters
batch_size=8
num_epochs=100
use_weights = 1
num_classes = 5
image_dims=[500,500,3]

data = DataReader(directory, batch_size, num_epochs, use_weights=0)
dataset = data.test_batch(data_file)
num_images = data.num_images

# get image filenames
image_list = data.image_list

# determine number of iterations based on number of images
num_iterations = int(np.floor(num_images/batch_size))

# create iterator allowing us to switch between datasets
data_iterator = dataset.make_one_shot_iterator()
next_element = data_iterator.get_next()

# create placeholder for train or test
train_network = tf.placeholder(tf.bool, [])
Ejemplo n.º 34
0
class Printer:
  def __init__(self):
    self.DR = DataReader()
    self.Features = RuleBasedFeatures()
    self.feats = ["cc", "ck", "bk", "pk", "hk", "oe", "3", "5", "6", "x", 'nword', 'hood', 'bCaret', 'cCaret', 'pCaret', 'hCaret']
    self.gang = {}
  
  def loadData(self, postsFile):
    self.DR.loadData(postsFile)
  
  def calculateFeatures(self, posts):
    Hits = dd(int)
    Scopes = dd(int)
    numWordsScope = dd(int)
    for post in posts:
      numWords = len(self.DR.posts[post][4].split())
      postHits, postScopes = self.Features.scorePostWordIndexing(self.DR.posts[post][4])
      #print postHits, postScopes
      for feat in self.feats:
        Scopes[feat] += len(postScopes[feat])
        Hits[feat + 'Count'] += len(postHits[feat + 'Count'])
        numWordsScope[feat] += numWords
    #print Hits, Scopes
    #simpleGlobal = self.globalScoreSimple(Hits, Scopes)
    #complexityGloabal = self.globalScoreComplexity(Hits, Scopes)  
    return Hits, Scopes, numWordsScope
  
  def globalScoreComplexity(self, counts, scopeDict):
    scopeIndices = set()
    for feat in scopeDict.iterkeys():
      for index in scopeDict[feat]:
        scopeIndices.add(index)
    count = 0
    for feat in counts.iterkeys():
      count += len(counts[feat])
    if len(scopeIndices) > 0:
      return str(round(count * 100.0 / len(scopeIndices), 2))
    return ""
  
  def globalScoreSimple(self, counts, scopeDict):
    scope = 0
    for feat in scopeDict.iterkeys():
      scope += len(scopeDict[feat])
    count = 0
    for feat in counts.iterkeys():
      count += len(counts[feat])
    if scope > 0:
      return str(round(count * 100.0 / scope, 2))
    return ""
  
  def printFeats(self, users, outFile):
    outFile = open(outFile, 'w', 1)
    for user in users:
      Hits, Scopes, numWordsScope = self.calculateFeatures(self.DR.userwisePosts[user])
      feats = []
      for feat in self.feats:
        try:
          feats.append(str(round(Hits[feat+'Count'] * 100.0 / Scopes[feat], 2)))
        except ZeroDivisionError:
          feats.append('-1')
      outFile.write(user + ',' + ','.join(feats) + ',' + self.gang[user] + '\n')
    outFile.close()
  
  def loadGangAnnotation(self, gangAnnotation):
    for line in open(gangAnnotation):
      line = line.strip().split('\t')
      self.gang[line[0]] = line[1]
Ejemplo n.º 35
0
 def _load_data(self):
     self.raw = DataReader(day=7).as_raw()
Ejemplo n.º 36
0
def Train():
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        global_step = tf.train.get_or_create_global_step()
        inc_step = tf.assign_add(global_step, 1)

        reader = DataReader(FLAGS.input_training_data,
                            FLAGS.buffer_size,
                            FLAGS.batch_size,
                            FLAGS.traing_epochs,
                            is_shuffle=True)
        model = LRModel()
        trainer = Trainer(model, inc_step, reader)

        summary_op = tf.summary.merge_all()
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True

        saver = tf.train.Saver(max_to_keep=FLAGS.max_model_to_keep,
                               name='model_saver')

        with tf.Session(config=config) as session:
            summ_writer = tf.summary.FileWriter(FLAGS.log_dir, session.graph)
            #Load Pretrain
            session.run(tf.local_variables_initializer())
            session.run(tf.global_variables_initializer())
            session.run(tf.tables_initializer())
            session.run(reader.iterator.initializer)

            #zero = tf.constant(0, dtype=tf.float32)

            ckpt = tf.train.get_checkpoint_state(FLAGS.output_model_path)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(session, ckpt.model_checkpoint_path)
                print("Load model from ", ckpt.model_checkpoint_path)
            else:
                print("No initial model found.")

            trainer.start_time = time.time()
            while True:
                try:
                    _, avg_loss, total_weight, step, summary = session.run(
                        trainer.train_ops() + [summary_op])

                    #where = tf.not_equal(model.W, zero)
                    #indices = tf.where(where)
                    #print(session.run(indices))
                    #print(session.run(tf.gather(model.W, indices)))
                    #print(session.run(model.b))
                    #print(step)
                    if step % FLAGS.log_frequency == 1:
                        summ_writer.add_summary(summary, step)
                        trainer.print_log(total_weight, step, avg_loss)
                    if step % FLAGS.checkpoint_frequency == 1:
                        saver.save(session,
                                   FLAGS.output_model_path + "/model",
                                   global_step=step)
                except tf.errors.OutOfRangeError:
                    print("End of training.")
                    break
    pass
Ejemplo n.º 37
0
                  format(network_type, num_hidden_units, num_layers,  num_epochs, embed_vector_size, window_size, min_count))

    #mode = 'train'
    mode = 'evaluate'
    #mode = 'score'
    K.clear_session()
    with K.get_session() as sess:
        K.set_session(sess)
        graphr = K.get_session().graph
        with graphr.as_default():

            if mode == 'train':
                print("Training the model... num_epochs = {}, num_layers = {}".
                      format(num_epochs, num_layers))

                reader = DataReader(num_classes, vector_size=embed_vector_size)
                entityExtractor = EntityExtractor(reader,
                                                  embedding_pickle_file)
                entityExtractor.train(local_train_file_path,
                                      network_type=network_type,
                                      num_epochs=num_epochs,
                                      num_hidden_units=num_hidden_units,
                                      num_layers=num_layers)
                entityExtractor.save_tag_map(tag_to_idx_map_file)

                #Save the model
                entityExtractor.save(model_file_path)
            elif mode == 'evaluate':
                # Evaluate the model
                print("Evaluating the model...")
import scipy.sparse as sps

from Base.NonPersonalizedRecommender import TopPop, Random, GlobalEffects

from KNN.UserKNNCFRecommender import UserKNNCFRecommender
from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from KNN.ItemKNNCBFRecommender import ItemKNNCBFRecommender
from GraphBased.RP3betaRecommender import RP3betaRecommender
from GraphBased.P3alphaRecommender import P3alphaRecommender

from Data_manager.RecSys2019.RecSys2019Reader import RecSys2019Reader
from Data_manager.DataSplitter_leave_k_out import DataSplitter_leave_k_out

if __name__ == '__main__':
    for seed in [0, 1, 2, 3, 4]:
        data_reader = DataReader()
        data = DataObject(data_reader, 1, random_seed=seed)
        recommender = RP3betaRecommender(data.urm_train)
        recommender.fit(topK=10, alpha=0.27, beta=0.11)
        LogToFileEvaluator.evaluate(data,
                                    seed,
                                    recommender,
                                    "RP3",
                                    "",
                                    filename="algo_eval.csv")

    # for seed in [0, 1, 2, 3, 4]:
    #     data_reader = DataReader()
    #     data = DataObject(data_reader, 1, random_seed=seed)
    #     recommender = ItemKNNCFRecommender(data.urm_train)
    #     recommender.fit(topK=22, shrink=850, similarity="jaccard", feature_weighting="BM25")
Ejemplo n.º 39
0
class MeinDialog(QtGui.QDialog, Dlg): 
    def __init__(self): 
        QtGui.QDialog.__init__(self) 
        self.setupUi(self)        
        self.datreader = DataReader()        
        self.Plotter = Plotter()
        self.directory = os.getcwd()
        self.WorkingD_label.setText(self.directory)
        
        self.ShowFile_PB.clicked.connect(self.show_file_start) # shows first lines in the textbrowser
        self.ReadSets_PB.clicked.connect(self.read_set) # reads all files that start with lineEdit and creates a dict in the Sets_Dict[set][file][column]
        self.PlotFile_PB.clicked.connect(self.plotfile)
        self.MAV_slider.valueChanged.connect(self.mav_valuechanged)
        self.MAV_slider.sliderReleased.connect(self.mav_released)
        self.LP_slider.sliderReleased.connect(self.lp)
        self.LP_slider.valueChanged.connect(self.lp_valuechanged)
        self.HP_slider.sliderReleased.connect(self.hp)
        self.HP_slider.valueChanged.connect(self.hp_valuechanged)
        #self.CutZeros.clicked.connect(self.cut_zeros_filedict)
        self.PlotColumn_PB.clicked.connect(self.plotcolumn)
        self.Clear_PB.clicked.connect(self.clear)
        self.Export_PB.clicked.connect(self.export)
        self.FFT_PB.clicked.connect(self.fft)
        self.ReadLabBook.clicked.connect(self.readlabbook)
        self.MAVEdit.returnPressed.connect(self.mav_released)
        self.MVAREdit.returnPressed.connect(self.mvar)
        self.MMMINEdit.returnPressed.connect(self.mmmin)
        self.Corr_PB.clicked.connect(self.correlate)
        self.Select_PB.clicked.connect(self.open_filedialog)  
        self.Pyro_PB.clicked.connect(self.read_pyro)
        self.Log_PB.clicked.connect(self.log_scale)
        
        self.Sets_Dict = dict() # contains [set1][file1][column1] - the data
        self.Files_Dict = dict() # contains [filename 1]: 'set-filename' 
        self.Columns_Dict = dict() # contains[set-filename-column]: same
        
    def log_scale(self):
        print 'log_scale'
        f = self.Plotter.figure_list[int(self.CurrentFigureEdit.text())]
        ax = f.axes[int(self.ax_scale_edit.text())]
        if ax.get_yscale == 'linear':
            ax.set_yscale('log')
            print 'log'
        else:
            ax.set_yscale('linear')
        plt.title('')
                
    def read_pyro(self):
        print 'read_pyro'
        filelist = list()
        filelist = [f for f in os.listdir(self.directory) if f.startswith(self.lineEdit.text())]
        print filelist
        filelist = [os.path.join(self.directory, f) for f in filelist]
        cols_of_interest = [str(c).rstrip(' ').lstrip(' ') for c in self.ColsOfInterestEdit.text().split(',')]
        print cols_of_interest
        self.Sets_Dict[str(self.lineEdit.text())] = self.datreader.read_pyro(filelist, cols_of_interest)
        #self.cut_zeros_filedict()
        self.update_SetScroll()
        self.update_Files_Dict() 
        self.update_FileScroll()
        self.update_Columns_Dict()
        self.update_ColumnScroll()
        print self.Sets_Dict.keys()
        
        
    def open_filedialog(self):
        files = str(QtGui.QFileDialog.getOpenFileName(None, QtCore.QString('Select File'), QtCore.QString(os.getcwd()),QtCore.QString('*.txt')))
        print files
        self.lineEdit.setText(os.path.basename(files))
        self.WorkingD_label.setText(os.path.dirname(files))
        self.directory = os.path.dirname(files)
        
    def correlate(self):
        fnum = self.Plotter.plot_eval(self.Plotter.correlate, 0, int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked())
        self.CurrentFigureEdit.setText(str(fnum))
        
    def readlabbook(self):
        #self.HistoryEdit.insertPlainText(self.HistoryEdit.text())
        print 'read labbook'
        lab_dict = self.datreader.get_labdict(str(self.lineEdit.text()))
        filelist = lab_dict.keys()
        print filelist
        path = self.directory
        filelist = [os.path.join(path, f) for f in filelist if f in os.listdir(path)]
        print filelist
        cols_of_interest = [str(c).rstrip(' ').lstrip(' ') for c in self.ColsOfInterestEdit.text().split(',')]
        print cols_of_interest
        self.Sets_Dict[str(self.lineEdit.text())] = self.datreader.read_files(filelist, cols_of_interest)
        #self.cut_zeros_filedict()
        lab_dict = self.datreader.get_labdict(str(self.lineEdit.text()))
        for f in self.Sets_Dict[str(self.lineEdit.text())].keys():
            for info in lab_dict[f].keys():
                self.Sets_Dict[str(self.lineEdit.text())][f][info] = lab_dict[f][info]
        self.update_SetScroll()
        self.update_Files_Dict() 
        self.update_FileScroll()
        self.update_Columns_Dict()
        self.update_ColumnScroll()
        print self.Sets_Dict.keys()
    
    def mvar(self):
        fnum = self.Plotter.plot_eval(self.Plotter.mvar, int(self.MVAREdit.text()), int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked())
        self.CurrentFigureEdit.setText(str(fnum))
        
    def mmmin(self):
        fnum = self.Plotter.plot_eval(self.Plotter.mmmin, int(self.MMMINEdit.text()), int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked())
        self.CurrentFigureEdit.setText(str(fnum))
        
        
    def mav_released(self):
        #if not self.InActiveFigure.isChecked():
         #   self.MAVEdit.setText(str(self.MAV_slider.value())) 
        fnum = self.Plotter.plot_eval(self.Plotter.mav, int(self.MAVEdit.text()), int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked())
        self.CurrentFigureEdit.setText(str(fnum))
        
    def fft(self):
        print 'fft'
        fnum = self.Plotter.plot_eval(self.Plotter.fft, 0, int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked())
        self.CurrentFigureEdit.setText(str(fnum))
    
    def export(self):
        self.Plotter.export(int(self.CurrentFigureEdit.text()))
        
        
    def clear(self):
        self.Sets_Dict = dict()
        self.update_SetScroll()
        self.Files_Dict = dict()
        self.update_FileScroll()
        self.Columns_Dict = dict()
        self.update_ColumnScroll()
        
    def lp_valuechanged(self):
        self.LPEdit.setText(str(self.LP_slider.value()))       
       
    def lp(self):
        print 'mav'
        self.MAVEdit.setText(str(self.LP_slider.value()))
        
    def hp_valuechanged(self):
        self.HPEdit.setText(str(self.HP_slider.value()))       
       
    def hp(self):
        print 'mav'
        self.MAVEdit.setText(str(self.HP_slider.value()))
        
    def mav_valuechanged(self):
        self.MAVEdit.setText(str(self.MAV_slider.value()))
        if self.InActiveFigure.isChecked():
            fnum = self.Plotter.plot_eval(self.Plotter.mav, int(self.MAVEdit.text()), int(self.CurrentFigureEdit.text()), self.InActiveFigure.isChecked(), self.SelectedRange.isChecked(), self.SubtractMean_PB.isChecked())
            self.CurrentFigureEdit.setText(str(fnum))

    def plotcolumn(self):
        for col in self.ColumnScroll.selectedItems():
            key = str(col.text()).split('::')
            col_data = self.Sets_Dict[key[0]][key[1]][key[2]]
            x_axis = self.Sets_Dict[key[0]][key[1]]['Zeit']
            label = str(col.text()+ '')
            self.Plotter.plot_column(x_axis, col_data, int(self.CurrentFigureEdit.text()), label)
        
#    def cut_zeros_filedict(self):
#        print 'cut_zeros_filedict'
#        if self.CutZeros.isChecked() == True:
#            print 'checked'
#            for fd in self.Files_Dict.keys():
#                self.Files_Dict[fd] = self.datreader.cutzeros_file_dict(self.Files_Dict[fd])
                
        
    def plotfile(self):
        for f in self.FileScroll.selectedItems():
            key = str(f.text()).split('::')
            print key
            title = str(f.text()) 
            self.Plotter.plot_file(self.Sets_Dict[key[0]][key[1]], [ str(c).rstrip(' ').lstrip(' ') for c in self.ColsOfInterestEdit.text().split(',')], int(self.CurrentFigureEdit.text()), title)
            

    def read_set(self):
        print 'read_set'
        filelist = list()
        filelist = [f for f in os.listdir(self.directory) if f.startswith(self.lineEdit.text())]
        print filelist
        filelist = [os.path.join(self.directory, f) for f in filelist]
        cols_of_interest = [str(c).rstrip(' ').lstrip(' ') for c in self.ColsOfInterestEdit.text().split(',')]
        print cols_of_interest
        self.Sets_Dict[str(self.lineEdit.text())] = self.datreader.read_files(filelist, cols_of_interest)
        #self.cut_zeros_filedict()
        self.update_SetScroll()
        self.update_Files_Dict() 
        self.update_FileScroll()
        self.update_Columns_Dict()
        self.update_ColumnScroll()
        print self.Sets_Dict.keys()
        
    def update_ColumnScroll(self):
        print 'update_ColumnScroll'
        self.ColumnScroll.clear()
        for col in self.Columns_Dict.keys():
            item = QtGui.QListWidgetItem()
            item.setText(col)
            self.ColumnScroll.addItem(item)
                
            
    def update_Columns_Dict(self):
        print 'update_FilesDict'
        cols_of_interest = [str(c).rstrip(' ').lstrip(' ') for c in self.ColsOfInterestEdit.text().split(',')]
        for s in self.Sets_Dict.keys(): # sets
            for f in self.Sets_Dict[s].keys(): #files
                for c in self.Sets_Dict[s][f].keys():
                    if c in cols_of_interest:
                        self.Columns_Dict[s + '::' + f + '::' + c] = s + '::' + f + '::' + c
        
    def update_Files_Dict(self):
        print 'update_FilesDict'
        for s in self.Sets_Dict.keys(): # sets
            print s
            for f in self.Sets_Dict[s].keys(): #files
                print f
                self.Files_Dict[f] = str(s) + '::'+ str(f)
        #self.cut_zeros_filedict()
                
        
    def update_SetScroll(self):
        print 'update_SetScroll'
        self.SetScroll.clear()
        for key in self.Sets_Dict.keys():
            item = QtGui.QListWidgetItem()
            item.setText(str(key))
            self.SetScroll.addItem(item)
        
    def update_FileScroll(self):
        print 'update_FileScroll'
        self.FileScroll.clear()
        for key in self.Files_Dict.keys():
            item = QtGui.QListWidgetItem()
            item.setText(str(self.Files_Dict[key]))
            self.FileScroll.addItem(item)

    def show_file_start(self):
        try:
            f = open(self.lineEdit.text())
            s= ''
            for i in range(12):
                s = s+f.readline()
            self.textBrowser.setText(s)
        except:
            print 'Error in file read'
Ejemplo n.º 40
0
 def __init__(self, filename):
     self.read = DataReader(filename)
     self.mapInfo = self.ReadMap()
Ejemplo n.º 41
0
def get_tweets_labels(tweet_file, labels_file):
    #Simply read in data
    data_reader = DataReader(tweet_file, labels_file)
    tweets = data_reader.read_tweets()
    labels = data_reader.read_labels()
    return tweets, labels
def main():
    print("Running on BIO-NLP data\n\n")

    from sys import platform
    if platform == "win32":
        home_dir = "C:\\dl4nlp"
    else:
        home_dir = os.path.join(os.path.expanduser('~'), "dl4nlp")

    print("home_dir = {}".format(home_dir))

    # The hyper-parameters of the word embedding trained model
    window_size = 5
    embed_vector_size = 50
    min_count = 400

    data_folder = os.path.join("sample_data", "drugs_and_diseases")

    test_file_path = os.path.join(data_folder, "Drug_and_Disease_test.txt")

    resources_pickle_file = os.path.join(home_dir, "models", "resources.pkl")

    # The hyper-parameters of the LSTM trained model
    #network_type= 'unidirectional'
    network_type = 'bidirectional'
    #embed_vector_size = 50
    num_classes = 7 + 1
    max_seq_length = 613
    num_layers = 2
    num_hidden_units = 150
    num_epochs = 10
    batch_size = 50
    dropout = 0.2
    reg_alpha = 0.0

    print("Initializing data...")

    model_file_path = os.path.join(home_dir,'models','lstm_{}_model_units_{}_lyrs_{}_epchs_{}_vs_{}_ws_{}_mc_{}.h5'.\
                  format(network_type, num_hidden_units, num_layers,  num_epochs, embed_vector_size, window_size, min_count))

    K.clear_session()
    with K.get_session() as sess:
        K.set_session(sess)
        graphr = K.get_session().graph
        with graphr.as_default():

            # Evaluate the model
            print("Evaluating the model...")

            reader = DataReader(
                input_resources_pickle_file=resources_pickle_file)
            entityExtractor = EntityExtractor(reader)

            #load the model
            print("Loading the model from file {} ...".format(model_file_path))
            entityExtractor.load(model_file_path)
            entityExtractor.print_summary()

            if not os.path.exists(os.path.join(home_dir, "output")):
                os.makedirs(os.path.join(home_dir, "output"))

            # make sure that the input test data file is in IOB format
            output_prediction_file = os.path.join(home_dir, "output",
                                                  "prediction_output.tsv")

            evaluation_report, confusion_matrix = entityExtractor.evaluate_model(
                test_file_path, output_prediction_file)
            print(evaluation_report)
            print(confusion_matrix)

            #########################################################
            # from the commmand line interface,
            # (1) change directory to \code\02_modeling\03_model_evaluation
            # (2) run the following perl evaluation script
            # "C:\Program Files\Git\usr\bin\perl.exe" Drug_and_Disease_eval.pl ..\..\..\sample_data\drugs_and_diseases\Drug_and_Disease_test.txt C:\dl4nlp\output\prediction_output.tsv
            #########################################################
    K.clear_session()
    K.set_session(None)
    print("Done.")
def populate_features(tweet_file, labels_file):
    data_reader = DataReader(tweet_file, labels_file)
    return data_reader.get_features()
Ejemplo n.º 44
0
from DataReader import DataReader
from Preprocessor import Preprocessor
from Vectorizer import Vectorizer
from Classifier import Classifier
from DeepLearning import DeepLearner
from sklearn.model_selection import train_test_split as split
import numpy as np

dr = DataReader('./datasets/training-v1/offenseval-training-v1.tsv', 'A')
data, labels = dr.get_labelled_data()
data, labels = dr.shuffle(data, labels, 'random')

data = data[:]
labels = labels[:]

prp = Preprocessor('remove_stopwords', 'lemmatize')
data = prp.clean(data)

tr_data, tst_data, tr_labels, tst_labels = split(np.array(data),
                                                 labels,
                                                 test_size=0.2,
                                                 stratify=labels)
tr_data, tr_labels = dr.upsample(tr_data, tr_labels, label=1)
tr_data, tr_labels = dr.shuffle(tr_data, tr_labels, 'random')

vct = Vectorizer('count')
vct.vectorize(tr_data)

model = DeepLearner(tr_data,
                    tr_labels,
                    vocab_length=vct.vocab_length,
Ejemplo n.º 45
0
class DataDriver:
    def __init__(self, oscars):
        self.Data = DataReader("tmdb-movies.csv")
        self.Data.formatData()
        self.OscarFile = pd.read_csv(oscars)
        self.ActorsDictionary = {}
        self.MovieDF = self.Data.getMovieDF()
        self.Categories = [
            "ACTOR", "ACTRESS", "ACTOR IN A SUPPORTING ROLE",
            "ACTRESS IN A SUPPORTING ROLE", "ACTOR IN A LEADING ROLE",
            "ACTRESS IN A LEADING ROLE"
        ]
        self.OutputData = self.Data.getOutput()
        self.cleanOscarData()

    def scoreGenres(self):
        genreList = [
            'Action', 'Adventure', 'Science Fiction', 'Thriller', 'Fantasy',
            'Crime', 'Western', 'Drama', 'Family', 'Animation', 'Comedy',
            'Mystery', 'Romance', 'War', 'History', 'Music', 'Horror',
            'Documentary', 'Foreign', 'TV Movie'
        ]
        GenreScore = {
            k: v
            for (k,
                 v) in zip(genreList, list(reversed(range(len(genreList) +
                                                          1))))
        }
        for ind, row in self.MovieDF.iterrows():
            score = 1
            for genre in row["genres"]:
                score *= GenreScore[genre]
            self.Data.setNewAttribute(ind, "genres", score)

    def setActorsDict(self):
        for ind, row in self.MovieDF.iterrows():
            for actor in row["cast"]:
                self.ActorsDictionary[actor] = 0
        self.scoreOscars()

    def cleanOscarData(self):
        self.OscarFile.drop(["year"], axis=1, inplace=True)
        for ind, row in self.OscarFile.iterrows():
            if row["category"] not in self.Categories:
                self.OscarFile.drop([ind], inplace=True)
        self.setActorsDict()

    def scoreOscars(self):
        for ind, row in self.OscarFile.iterrows():
            if row["winner"]:
                if row["entity"] in self.ActorsDictionary.keys():
                    val = self.ActorsDictionary[row["entity"]]
                    self.ActorsDictionary[row["entity"]] = val + 1
            elif row["entity"] in self.ActorsDictionary.keys():
                val = self.ActorsDictionary[row["entity"]]
                self.ActorsDictionary[row["entity"]] = val
        self.AddScores()

    def SearchDict(self, dic, name):
        for key, val in dic.items():
            if key == name:
                return val
        return 0

    def IterateScore(self, dic, arr):
        Score = 0
        for person in arr:
            Score += self.SearchDict(dic, person)
        return Score

    def AddScores(self):
        for ind, row in self.MovieDF.iterrows():
            self.MovieDF = self.Data.setNewAttribute(
                ind, "cast",
                self.IterateScore(self.ActorsDictionary, row["cast"]))

    def setRevOutput(self):
        df1 = pd.get_dummies(self.MovieDF["revenue"])
        self.MovieDF = pd.concat([self.MovieDF, df1], axis=1)
        self.MovieDF.drop(["revenue"], axis=1, inplace=True)

    def SaveData(self):
        self.MovieDF.to_csv("DataRevExploration.csv", index=False)
Ejemplo n.º 46
0
import joblib
import numpy
from sklearn.preprocessing import MinMaxScaler
from DataReader import DataReader


if __name__ == '__main__':
    # test_cases, test_result = DataReader.read_testdata()
    test_cases, test_result = DataReader.read_testdata()

    nn = joblib.load('./BPnn.pkl')

    # 2021.01.07: 归一化与反归一化工作整合进BPmodel中

    # 根据训练的模型预测测试集的结果
    result_predict = nn.userPredict(test_cases)

    i = 0
    miss = 0
    for res in result_predict:
        print('No. ', i+1, res, test_result[i][0], '%.2f%%' %
              (abs(res[0] - test_result[i][0]) / test_result[i][0] * 100))
        if round(res[0]) != test_result[i][0]:
            miss += 1
        i = i + 1

    print('correct rate: ', (i-miss)/i*100, '%')