Python Dataset Beispiele, data.data.Dataset Python Beispiele

Beispiel #1

0

Datei anzeigen

 def loadFile(self, sender):
     gis_file = QtGui.QFileDialog()
     fname = gis_file.getOpenFileName(self, 'Open File', '.')
     self.data = Dataset()
     self.data.load(fname)
     self.d_map = DataMap(self.data)
     self.file_label.setText("File Loaded!")
     self.o_vals = self.d_map.o_vals

Beispiel #2

0

Datei anzeigen

def main(clean_directory="resources/clean_data"):
    initial_time = time.time()

    clean_files = [
        f for f in listdir(clean_directory)
        if isfile(join(clean_directory, f)) and "2" in f
    ]

    for file in clean_files:
        results = []
        print(f"processing {file} file")
        dataset = Dataset(f"{clean_directory}/{file}")

        data_points = dataset.get_data()

        count = 1
        for data_point in data_points:
            start = time.time()
            print(f"\tlooking at {count}:{len(data_points)}")
            chess_engine = Engine(data_point.board,
                                  data_point.color,
                                  depth=data_point.mate_in * 2,
                                  algorithm="alpha-beta")
            result_moves = chess_engine.find_next_move()

            moves_to_mate = len(result_moves) // 2 + 1

            duration = (time.time() - start)
            print(
                f"\t\tfound mate in {moves_to_mate} moves; expecting {data_point.mate_in} in {duration} seconds"
            )

            result_moves_string = [str(move) for move in result_moves]

            results.append(
                (data_point.board.fen(), "->".join(result_moves_string),
                 moves_to_mate, data_point.mate_in, duration))

            count += 1
            # if count > 10:
            #     break

        output_name = file.split("/")[-1].split(".")[0]
        output_path = f"resources/results/{output_name}_results.csv"

        print(f"downloading results to {output_path}")
        with open(output_path, "w") as f:
            f.write(
                "fen,sequence_of_moves,found_mate,best_mate,difference,compute_duration\n"
            )

            for result in results:
                f.write(
                    f"{result[0]},{result[1]},{result[2]},{result[3]},{int(result[3]) - result[2]},{result[4]}\n"
                )

    print("_" * 80)
    print(f"total time: {(time.time() - initial_time)/60} minutes")

Beispiel #3

0

Datei anzeigen

def main(epochs, batch_size, layers, units, d_model, heads, dropout,
         dataset_dir, checkpoint, restore_checkpoint, epoch_to_save, verbose):
    if not os.path.exists(checkpoint):
        os.makedirs(checkpoint)

    config_path = os.path.join(checkpoint, 'config.json')
    config = {
        'num_layers': layers,
        'units': units,
        'd_model': d_model,
        'num_heads': heads,
        'dropout': dropout,
    }

    if restore_checkpoint:
        try:
            config = check_checkpoint_config(config_path)
        except FileNotFoundError as e:
            echo(e)
            raise ValueError('Checkpoint config not found.')

        mapping, max_len = config['mapping'], config['max_len']
        train_dataset = Dataset(dataset_dir,
                                config={
                                    'mapping': mapping,
                                    'max_len': max_len
                                })
        train_dataset.create()
    else:
        train_dataset = Dataset(dataset_dir)
        train_dataset.create()
        config['mapping'], config[
            'max_len'] = train_dataset.mapping, train_dataset.max_len
        mapping, max_len = config['mapping'], config['max_len']

    buffer_size = len(train_dataset.data)

    if verbose != 0:
        echo(config)

    dataset = train_dataset.export_as_tf_dataset()\
        .cache()\
        .shuffle(buffer_size)\
        .batch(batch_size)\
        .prefetch(tf.data.experimental.AUTOTUNE)

    print('Training Data Size:', buffer_size)

    model = Model(config, checkpoint)

    if not restore_checkpoint:
        with open(config_path, 'w') as f:
            f.write(json.dumps(config))
    else:
        model.restore_checkpoint()

    model.train(dataset, epochs, epoch_to_save)

Beispiel #4

0

Datei anzeigen

Datei: test.py Projekt: LiuDongDaniel/test

def test_model(model_path, predict_csv_save_name, test_img_save_path):
    args = parse_args()
    # load test data
    dataset_test = Dataset(args.test_csv_path)
    test_loader = torch.utils.data.DataLoader(dataset_test)
    # load model
    # model = Simple_CNN()
    # model = LeNet5()
    model = Simple_CNN_30()
    # resnet = torchvision.models.resnet18(pretrained=True)
    # resnet.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
    # resnet.fc = nn.Linear(in_features=512, out_features=2)
    # model = resnet

    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['net'])
    # for save value to csv
    out = open(predict_csv_save_name, 'w')
    out.writelines("name" + "," + "label" + "," + "predict" + "\n")
    # for roc
    pos_score_list = []
    actual_val_list  = []
    predict_label_list = []

    for num, test_item in enumerate(test_loader):
        inputs, actual_val, img_name, img = test_item
        predicted_val = model(inputs)
        predicted_val = predicted_val.data
        max_score, predict_label = torch.max(predicted_val, 1)

        # for save_img
        # img_split_save(test_img_save_path, img_name, img, actual_val, predict_label)
        #save predict to csv
        # out.writelines(img_name[0] + "," +str(actual_val.numpy()[0]) + "," + str(predict_label.numpy()[0]) + "\n")

        pos_score = predicted_val.numpy()[0][1]
        pos_score_list.append(pos_score)
        actual_val_list.append(actual_val.numpy()[0])
        predict_label_list.append(predict_label.numpy()[0])

        # print('label:', actual_val.numpy()[0], 'predict:', predict_label.numpy()[0], max_score.numpy()[0])
    gene_roc_curve(actual_val_list, pos_score_list, 1)
    # gene_free_roc_curve(actual_val_list, pos_score_list, 1, 163)
    gene_recall_and_precison(actual_val_list, predict_label_list, 1)

Beispiel #5

0

Datei anzeigen

Datei: train_deepMF_with_text.py Projekt: cc233/recommendation-with-text

def train():
    print 'Learning rate:' + str(FLAGS.learning_rate)
    print 'Num neg:' + str(FLAGS.num_neg)
    print 'Latent dim:' + str(FLAGS.latent_dim)
    print 'text dim:' + str(FLAGS.text_latent_dim)
    print 'Batch size:' + str(FLAGS.batch_size)
    time_start = time.time()
    ckpt_path = FLAGS.train_dir
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    #print ckpt_path
    #load data
    dataset = Dataset(rating_matrix_path=FLAGS.rating_matrix_path,
                      num_negatives=FLAGS.num_neg,
                      batch_size=FLAGS.batch_size,
                      data_set=FLAGS.data_set,
                      text_path=FLAGS.text_path,
                      word2vec_model_path=FLAGS.word2vec_model_path)

    user_num, item_num = dataset.get_user_item_num()
    print("Load data done. #user=%d, #item=%d, " % (user_num, item_num))
    train_num = dataset.get_train_num()
    print('train_num=%d' % (train_num))
    doc_index, doc_index_reverse, doc_mask, doc_mask_bool, word_vec = dataset.get_doc(
    )
    time_end = time.time()
    print 'time used:' + str(time_end - time_start) + 's'
    time_start = time_end
    gpu_options = tf.GPUOptions(allow_growth=True)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        model = create_model(user_num, item_num, ckpt_path, FLAGS.optimizer,
                             sess, doc_index, doc_index_reverse, doc_mask,
                             doc_mask_bool, word_vec, FLAGS.num_neg)
        #writer = tf.summary.FileWriter(FLAGS.log_path, sess.graph)
        iterations = train_num * FLAGS.epochs / FLAGS.batch_size + 1
        print 'Iteration:' + str(iterations)
        #training
        loss_sum = 0
        item_embed_sum = 0
        item_doc_sum = 0
        item_embed_without_lambda_sum = 0
        item_doc_without_lambda_sum = 0
        itr_print = FLAGS.verbose
        itr_save = FLAGS.save
        itr_test = FLAGS.test_itr

        time_end = time.time()
        print 'time used:' + str(time_end - time_start) + 's'
        time_start = time_end
        for itr in xrange(iterations):
            if (itr % itr_print == 0 and itr != 0):
                time_end = time.time()
                print 'itr:' + str(itr) + 'time used:' + str(time_end -
                                                             time_start) + 's'
                time_start = time_end
            if itr < 5:
                time_end = time.time()
                print 'itr:' + str(itr) + 'time used:' + str(time_end -
                                                             time_start) + 's'
                time_start = time_end
            users, pos_items, neg_item_set, neg_item_index = dataset.next_batch(
            )
            if itr < 5:
                time_end = time.time()
                print 'itr:' + str(itr) + 'time used:' + str(time_end -
                                                             time_start) + 's'
                time_start = time_end
            loss, test = model.step(sess, users, pos_items, neg_item_set,
                                    neg_item_index, True)
            #print loss
            #testing
            #test=model.step(sess, users, pos_items, neg_items,True)
            #print test
            #break
            loss_sum += loss
            item_embed_sum += test[0]
            item_doc_sum += test[1]
            item_embed_without_lambda_sum += test[2]
            item_doc_without_lambda_sum += test[3]
            if itr < 5:
                time_end = time.time()
                print 'itr:' + str(itr) + 'time used:' + str(time_end -
                                                             time_start) + 's'
                time_start = time_end
            #if((itr%itr_save==0 and itr!=0) or itr==iterations-1):
            #    model.saver.save(sess,ckpt_path+'train',model.global_step)
            #print loss
            if (itr % itr_print == 0 and itr != 0):
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Average Loss:' + str(loss_sum / itr_print)
                print 'Average pos_embed:' + str(
                    item_embed_sum / itr_print) + ' Average doc:' + str(
                        item_doc_sum /
                        itr_print) + ' Average embed_without_lambda:' + str(
                            item_embed_without_lambda_sum /
                            itr_print) + ' Average doc_without_lambda:' + str(
                                item_doc_without_lambda_sum / itr_print)
                loss_sum = 0
                item_embed_sum = 0
                item_doc_sum = 0
                item_embed_without_lambda_sum = 0
                item_doc_without_lambda_sum = 0
                #print test
                sys.stdout.flush()
            #print loss
            if (itr == (iterations - 1) and itr % itr_print != 0):
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Average Loss:' + str(loss_sum /
                                                      (itr % itr_print))
                loss_sum = 0
                sys.stdout.flush()
            #test
            if (itr % itr_test == 0 or itr == iterations - 1):
                auc, precision_50, precision_100, precision_150, precision_200, precision_250, precision_300, map, ndcg, recall_50, recall_100, recall_150, recall_200, recall_250, recall_300 = evaluate(
                    model, sess, dataset)

                print 'Epoch:' + str(dataset.get_epoch(
                )) + ' Iteration:' + str(itr) + ' MAP:' + str(map)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Precision 50:' + str(precision_50)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Precision 100:' + str(precision_100)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Precision 150:' + str(precision_150)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Precision 200:' + str(precision_200)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Precision 250:' + str(precision_250)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Precision 300:' + str(precision_300)
                #print 'Epoch:'+str(dataset.get_epoch())+' Iteration:'+str(itr)+' NDCG:'+str(ndcg)
                print 'Epoch:' + str(dataset.get_epoch(
                )) + ' Iteration:' + str(itr) + ' AUC:' + str(auc)
                print 'Epoch:' + str(dataset.get_epoch(
                )) + ' Iteration:' + str(itr) + ' Recall 50:' + str(recall_50)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Recall 100:' + str(recall_100)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Recall 150:' + str(recall_150)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Recall 200:' + str(recall_200)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Recall 250:' + str(recall_250)
                print 'Epoch:' + str(
                    dataset.get_epoch()) + ' Iteration:' + str(
                        itr) + ' Recall 300:' + str(recall_300)
                #print test
                sys.stdout.flush()

Beispiel #6

0

Datei anzeigen

        predicate_out=Dense(128)(predicate_drop)
        
        

        sim=Lambda(lambda x:base_dssm.cosine(x[0],x[1]),output_shape=lambda x:(None,1))([question_out,predicate_out])
        sim_model=Model([input_1,input_2],sim)
        model_1=Model(input_1,question_out)
        model_1.compile(optimizer='adam',loss='mse')
        model_2=Model(input_2,predicate_out)
        model_2.compile(optimizer='adam',loss='mse')
        self.model_1=model_1
        self.model_2=model_2
        self.sim_model=sim_model
        self.build()

    

if __name__=="__main__":
    ds=Dataset(config.predicate_train_data,vocab=vocab,label_column=2,process=False)
    model=BiLSTMCNNSM()
    #model.load_weights()
    model.train(ds,iter_num=100,nb_epoch=1)
    model.save_weights()
    base_dssm.encodeData2file(model)
    '''
    ds=Dataset(data_path=config.seg_train_triples+".chars",vocab=vocab,label_column=None,process=False)
    questions=ds.get_column_data(0,ispadding=True,max_len=50)
    predicates=ds.get_column_data(2,ispadding=True,max_len=20)
    encoded_predicates=model.encode_predicate(predicates)
    encoded_questions=model.encode_question(questions)
    '''

Beispiel #7

0

Datei anzeigen

                     output_shape=lambda x:
                     (None, 1))([question_out, predicate_out])
        sim_model = Model([question_in, predicate_in], sim)
        model_1 = Model(question_in, question_out)
        model_1.compile(optimizer='adam', loss='mse')
        model_2 = Model(predicate_in, predicate_out)
        model_2.compile(optimizer='adam', loss='mse')
        self.model_1 = model_1
        self.model_2 = model_2
        self.sim_model = sim_model
        self.build()


if __name__ == "__main__":
    ds = Dataset(config.predicate_train_data,
                 vocab=vocab,
                 label_column=2,
                 process=False)
    model = CDSSM()
    #model.load_weights()

    model.train(ds, iter_num=20, nb_epoch=1)
    model.save_weights()

    base_dssm.encodeData2file(model)

    ds = Dataset(data_path=config.seg_train_triples + ".chars",
                 vocab=vocab,
                 label_column=None,
                 process=True)
    questions = ds.get_column_data(0, ispadding=True, max_len=50)
    predicates = ds.get_column_data(2, ispadding=True, max_len=20)

Beispiel #8

0

Datei anzeigen

class Window(QtGui.QWidget):
    def __init__(self):
        super(Window, self).__init__()
        self.d_map = None
        self.data = None
        self.record = None
        self.i_vals = []
        self.o_vals = []
        self.initUI()

    def initUI(self):
        self.setFixedHeight(500)
        self.setFixedWidth(700)

        #IDW calculation Button
        IDW_btn = QtGui.QPushButton('Calculate IDW', self)
        IDW_btn.clicked.connect(self.calculate_idw)
        IDW_btn.resize(IDW_btn.sizeHint())
        IDW_btn.move(10, 200)
        self.idw_label = QtGui.QLabel(self)
        self.idw_label.move(170, 200)
        self.idw_label.setText("")

        #Generate LOOCV Button
        loocv_btn = QtGui.QPushButton('Generate Loocv File', self)
        loocv_btn.clicked.connect(self.generateLoocv)
        loocv_btn.resize(loocv_btn.sizeHint())
        loocv_btn.move(10, 300)
        self.loocv_txt = QtGui.QLabel(self)
        self.loocv_txt.setText("")
        self.loocv_txt.move(70, 300)

        #Generate Error Summary Button
        error_btn = QtGui.QPushButton('Generate Error Summary', self)
        error_btn.clicked.connect(self.generateError)
        error_btn.resize(error_btn.sizeHint())
        error_btn.move(10, 340)
        self.error_label = QtGui.QLabel(self)
        self.error_label.setText("")
        self.error_label.move(70, 340)

        #Quit Button handler and positioning code below
        quit_btn = QtGui.QPushButton('Quit', self)
        quit_btn.clicked.connect(QtCore.QCoreApplication.instance().quit)
        #quit_btn.setToolTip('This is a <b>QPushButton</b> widget')
        quit_btn.resize(quit_btn.sizeHint())
        quit_btn.move(420, 450)

        #Load file button
        load_btn = QtGui.QPushButton('Load File', self)
        load_btn.resize(load_btn.sizeHint())
        load_btn.move(10, 450)
        load_btn.clicked.connect(self.loadFile)

        #File loaded label, initialize to ""
        self.file_label = QtGui.QLabel(self)
        self.file_label.setText("No File Loaded")
        self.file_label.move(120, 455)

        #query button
        query_btn = QtGui.QPushButton('Interpolate Value ->', self)
        query_btn.resize(query_btn.sizeHint())
        query_btn.move(10, 50)
        query_btn.clicked.connect(self.interpolate)
        self.setGeometry(200, 200, 250, 150)
        #self.setWindowTitle('Tooltips')

        #Interpolated Value Label
        self.i_label = QtGui.QLabel(self)
        self.i_label.setText("Interpolated Value: ")
        self.move(10, 150)

        #Query Field Input
        self.query_x = QtGui.QLineEdit(self)
        self.query_x.setFixedWidth(50)
        self.query_x.move(260, 50)
        x_label = QtGui.QLabel(self)
        x_label.setText("X Value: ")
        x_label.move(200, 50)
        self.query_y = QtGui.QLineEdit(self)
        self.query_y.setFixedWidth(50)
        self.query_y.move(380, 50)
        y_label = QtGui.QLabel(self)
        y_label.setText("Y Value: ")
        y_label.move(320, 50)
        self.query_t = QtGui.QLineEdit(self)
        self.query_t.setFixedWidth(50)
        self.query_t.move(500, 50)
        t_label = QtGui.QLabel(self)
        t_label.setText("T Value: ")
        t_label.move(440, 50)
        self.query_p = QtGui.QLineEdit(self)
        self.query_p.setFixedWidth(50)
        self.query_p.move(630, 50)
        p_label = QtGui.QLabel(self)
        p_label.setText("EXP Value: ")
        p_label.move(560, 50)

        #Show the window finally
        self.setWindowTitle('GIS')
        self.show()
#Task 1: Import

    def loadFile(self, sender):
        gis_file = QtGui.QFileDialog()
        fname = gis_file.getOpenFileName(self, 'Open File', '.')
        self.data = Dataset()
        self.data.load(fname)
        self.d_map = DataMap(self.data)
        self.file_label.setText("File Loaded!")
        self.o_vals = self.d_map.o_vals

    def showInputDialog(self):

        text, ok = QtGui.QInputDialog.getText(self, 'Input Dialog',
                                              'Enter your data via "x y t":')

        if ok:
            vin = str(text)
            x, y, t = vin.split(" ")
            self.record = Record(x, y, t, self.d_map)
            self.IDW_Query(self.record)
#Task 4: Interpolate the given value.

    def interpolate(self):
        nb, ok = QtGui.QInputDialog.getText(
            self, 'Input Dialog', 'How Many Neighbors do you want to use?:')
        if ok:
            n = int(nb)
        else:
            n = 3
        x = float(self.query_x.text())
        y = float(self.query_y.text())
        t = float(self.query_t.text())
        exp = float(self.query_p.text())
        #record = Record(x, y, t, 0.0)
        record = self.d_map.get_record(x, y, t)
        i_val = Record.interpolate_value(record.x, record.y, record.t,
                                         record.m, self.d_map, n, exp)
        self.i_label.setText("Interpolated Value: %f" % i_val)

#Task 5: Generate Loocv file for powers 1,2,3 and Neighbors 3,4,5. (measurement,p1n1,p1n2,p1n3,...)

    def generateLoocv(self):
        Record.generateloocv(self.d_map)
        self.loocv_txt.setText("Loocv File Generated to /output/loocv_idw.txt")

#Task 5: Generate the Error Estimations for all 9 Interpolations based on the original values (original, I1, I2, I3, I4,...I8)

    def generateError(self):
        Record.generateError(Record.parseLoocv("../output/loocv_idw"))
        self.error_label.setText(
            "Error Summary Generated to /output/error_statistics_idw.txt")

    def IDW_Query(self, record, exp=1, n=1):
        '''
        We will set a record with the inputted data from the GUI.
        We then calculate the neighbors and use this list of neighbors
        in the IDW calculations.
        '''
        result = Record.interpolate_value(record.x, record.y, record.t,
                                          self.d_map, n, exp)
        self.query_field.setText(str(result))


#Task 3: Interpolate using IDW

    def calculate_idw(self):
        # Calculate IDW and output to an array to store the i_vals, then can use it in the error measurements
        n, ok = QtGui.QInputDialog.getText(self, 'Input Neighbors',
                                           'How Many Neighbors?:')
        if ok:
            neighbors = int(n)
        else:
            neighbors = 3
        exp, ok = QtGui.QInputDialog.getText(self, 'Input Exp',
                                             'What Exponent?:')
        if ok:
            p = int(exp)
        else:
            p = 1
        for record in self.d_map.records:
            i_val = Record.interpolate_value(record.x, record.y, record.t,
                                             record.m, self.d_map, neighbors,
                                             p)
            self.i_vals.append(i_val)
        self.idw_label.setText("IDW Calculated!")