def read_data_sets(fake_data=False):
    global TRAIN_FILE_READER, VALID_FILE_READER, TEST_FILE_READER

    class DataSets(object):
        pass

    data_sets = DataSets()
    data_sets.train = []
    data_sets.validation = []
    data_sets.test = []
    data_sets.trainpart = []
    data_sets.testpart = []

    get_train_valid_test_sets(DATA_SWITCH)

    TRAIN_FILE_READER = fr.FileReader()
    TRAIN_FILE_READER.initialize(os.path.join(data_dir, TRAINSET),
                                 TRAINSET_MAXSIZE * NUM_SAMPLE_LENGTH)

    VALID_FILE_READER = fr.FileReader()
    VALID_FILE_READER.initialize(os.path.join(data_dir, VALIDSET),
                                 VALIDSET_MAXSIZE * NUM_SAMPLE_LENGTH)

    TEST_FILE_READER = fr.FileReader()
    TEST_FILE_READER.initialize(os.path.join(data_dir, TESTSET),
                                TESTSET_MAXSIZE * NUM_SAMPLE_LENGTH)

    change_train_data_set(data_sets)
    change_valid_data_set(data_sets)
    change_test_data_set(data_sets)

    return data_sets
Exemple #2
0
def reading_file(list_file):
    to_research = []
    for file in list_file:
        with file_reader.FileReader(file) as record_reader:
            for record in record_reader:
                to_research.append(record)
    return to_research
Exemple #3
0
def test_FileReader():
    reader = file_reader.FileReader(
        "/code/cohort3/src/python/comp-220/dummy-syntax.js")
    assert reader.line_count() == 14
    assert reader.word_count('else') == 3
    assert reader.char_count() == 307
    assert reader.summary(
        "else"
    ) == "File /code/cohort3/src/python/comp-220/dummy-syntax.js Summary: \n- 14 lines \n- 307 characters (incl. spaces) \n- 3 instances of the word 'else'"
    reader = file_reader.FileReader("/code/cohort3/src/javascript/syntax.js")
    assert reader.line_count() == 247
    assert reader.word_count('else') == 10
    assert reader.char_count() == 7626
    assert reader.summary(
        "else"
    ) == "File /code/cohort3/src/javascript/syntax.js Summary: \n- 247 lines \n- 7626 characters (incl. spaces) \n- 10 instances of the word 'else'"
Exemple #4
0
    def build_query_vec(self):

        query_fr = file_reader.FileReader(self.combine_file,
                                          words_filter=dataset.stop_words,
                                          vector_type=self.query_method)
        query_set = query_fr.build_set(self.combine_file)

        # the vector of query
        return query_set['doc' + str(self.combine_file_last_line)][0:-1]
Exemple #5
0
 def createGraph(self):
     fileReader = file_reader.FileReader()
     self.nodesDict = fileReader.readNodes()
     arcsList = fileReader.readArcs()
     for arc in arcsList:
         # node1Id, node2Id, distance = arc
         node1ID = arc["nodes"][0]
         node2ID = arc["nodes"][1]
         distance = arc["distance"]
         self.nodesDict[node1ID].addNeighbor(node2ID, distance)
         self.nodesDict[node2ID].addNeighbor(node1ID, distance)
Exemple #6
0
def test() :
    MHz = 1e6
    fs = 4 * MHz
    block_size = 4 * int(fs / 1000) # 4 millisecond blocks

    fr = file_reader.FileReader()
    ca = ca_search.CASearch( fs )

    for b in range(10) :
        bb_IQ = fr.read(block_size)
        ca.processBlock(bb_IQ)
def read_data_sets(train_skip_samples=0,
                   valid_skip_samples=0,
                   test_skip_samples=0):
    global TRAIN_FILE_READER, VALID_FILE_READER, TEST_FILE_READER

    class DataSets(object):
        def __init__(self):
            self.train = []
            self.validation = []
            self.test = []

    data_sets = DataSets()
    get_train_valid_test_sets(DATA_SWITCH)

    TRAIN_FILE_READER = fr.FileReader()
    TRAIN_FILE_READER.initialize(os.path.join(DATA_PATH, TRAINSET),
                                 TRAINSET_MAXSIZE * NUM_SAMPLE_LENGTH)
    TRAIN_FILE_READER.read_data_skip(train_skip_samples * NUM_SAMPLE_LENGTH,
                                     False, np.uint8)

    VALID_FILE_READER = fr.FileReader()
    VALID_FILE_READER.initialize(os.path.join(DATA_PATH, VALIDSET),
                                 VALIDSET_MAXSIZE * NUM_SAMPLE_LENGTH)
    VALID_FILE_READER.read_data_skip(valid_skip_samples * NUM_SAMPLE_LENGTH,
                                     False, np.uint8)

    TEST_FILE_READER = fr.FileReader()
    TEST_FILE_READER.initialize(os.path.join(DATA_PATH, TESTSET),
                                TESTSET_MAXSIZE * NUM_SAMPLE_LENGTH)
    TEST_FILE_READER.read_data_skip(test_skip_samples * NUM_SAMPLE_LENGTH,
                                    False, np.uint8)

    change_train_data_set(data_sets)
    change_valid_data_set(data_sets)
    change_test_data_set(data_sets)

    return data_sets
def read_data_sets(fake_data=False, dtype=tf.float32):
    global train_file_reader, valid_file_reader, test_file_reader

    class DataSets(object):
        pass

    data_sets = DataSets()

    if fake_data:

        def fake():
            return DataSet([], [], fake_data=True, dtype=dtype)

        data_sets.train = fake()
        data_sets.validation = fake()
        data_sets.test = fake()
        return data_sets

    get_train_valid_test_sets(DATA_SWITCH)

    train_file_reader = fr.FileReader()
    valid_file_reader = fr.FileReader()
    test_file_reader = fr.FileReader()

    train_file_reader.initialize(os.path.join(DATA_PATH, TRAINSET),
                                 TRAINSET_MAXSIZE * NUM_SAMPLE_LENGTH)
    valid_file_reader.initialize(os.path.join(DATA_PATH, VALIDSET),
                                 VALIDSET_MAXSIZE * NUM_SAMPLE_LENGTH)
    test_file_reader.initialize(os.path.join(DATA_PATH, TESTSET),
                                TESTSET_MAXSIZE * NUM_SAMPLE_LENGTH)

    change_train_data_set(data_sets)
    change_valid_data_set(data_sets)
    change_test_data_set(data_sets)

    return data_sets
Exemple #9
0
    def read_header(self):

        present_time = file_reader.FileReader(self.file)
        present_time2 = present_time.read_file_time()[0]

        time_split = present_time2.split(':')

        inc_time = dateTime.dateTime(time_split[0], time_split[1])
        inc_time = inc_time.add_minutes(present_time2, 5)

        try:
            with open(self.file, 'r') as inFile:
                reader = inFile.readlines()[:6]
                lines = list(self.group(reader))
                final_list = 'Day: \n' + lines[0][
                    1] + 'Time: \n' + inc_time + '\n' + 'Company: \n' + 'NSHF'

        except IndexError:
            print("Error - Please specify an input file.")
            sys.exit(2)

        return final_list
Exemple #10
0
    def __init__(self, k, query, method):

        if method not in eval_types:
            raise ValueError("method is not supported")

        self.k = k
        self.query_method = eval_types[method]
        self.combine_file = self.build_combine_file(query)

        with open(self.combine_file) as f:
            self.combine_file_last_line = sum(1 for _ in f) - 1

        full_fr = file_reader.FileReader(self.combine_file,
                                         words_filter=dataset.stop_words,
                                         vector_type=TFIDF)

        combine_tfidf_set = full_fr.build_set(self.combine_file)

        del combine_tfidf_set['doc' + str(self.combine_file_last_line)]

        self.full_set = combine_tfidf_set

        self.query_vec = self.build_query_vec()
    def __init__(self, parent=None, width=8, height=6, dpi=150):
        """
        Initialization
        """
        # Initialize figure and axis
        fig = Figure(figsize=(width, height), dpi=dpi)
        self.ax = fig.add_subplot(
            1, 1, 1, projection=cartopy.crs.PlateCarree())
        self.ax.stock_img()
        self.ax.add_feature(cartopy.feature.LAND, zorder=1)
        self.ax.add_feature(cartopy.feature.BORDERS, zorder=2)
        self.ax.add_feature(cartopy.feature.COASTLINE, zorder=2)
        fig.tight_layout()

        # Initialize FigureCanvas
        FigureCanvas.__init__(self, fig)
        self.setParent(parent)
        FigureCanvas.setSizePolicy(self,
                                   QtWidgets.QSizePolicy.Expanding,
                                   QtWidgets.QSizePolicy.Expanding)
        FigureCanvas.updateGeometry(self)

        # Initialize some variables we use
        self.countries = shpreader.Reader(shpreader.natural_earth(resolution='110m',
                                                                  category='cultural',
                                                                  name='admin_0_countries')).records()
        tmp, self.countries = itertools.tee(self.countries)
        self.land = cartopy.feature.ShapelyFeature(
            (c.geometry for c in tmp), cartopy.crs.PlateCarree(), facecolor=cartopy.feature.COLORS['land'])

        # Initialize filereader
        self.fr = fr.FileReader('data.txt')
        # Read file
        self.sel_countries = self.fr.read_countries()
        # Fill in those in file
        self.fill_country(self.find_country_a3(self.sel_countries), 1)
Exemple #12
0
    SAVER_BASENAME = "two-layer-rnn-model-anna-simplified"
    SAVE_FREQUENCY = 0.10
    STEP_SIZE = 100
    TEST_MODE = False
    TOP_N = 5
    USE_MY_LSTM_CELL = False
    VALIDATION_FREQUENCY = 0.10
elif CONFIG_NAME == "3_LAYER_MODE":
    # These settings max out at ~60% accuracy, too.
    BATCH_SIZE = 10
    BURN_IN_LETTERS = 128
    CHARS_TO_GENERATE = 2048
    CLIP_GRADIENT = 5
    KEEP_PROB = 0.50
    LEARNING_RATE = .01
    NUM_EPOCHS = 20
    NUM_LAYERS = 3
    NUM_LSTM_UNITS = 256
    RESTORE_FILENAME = "./two-layer-rnn-model-anna-simplified-19-0439.ckpt"
    SAVER_BASENAME = "three-layer-rnn-model-anna-simplified"
    SAVE_FREQUENCY = 0.10
    STEP_SIZE = 100
    TEST_MODE = False
    TOP_N = 5
    VALIDATION_FREQUENCY = 0.10
else:
    raise Exception("unknown configuration name")

import file_reader as fr
file_reader = fr.FileReader('./anna-simplified.txt')
Exemple #13
0
Saves the corresponding signal arrays for beaconed, non-beaconed and probe trials.
In our virtual reality task, every every tenth trial is a probe trial, and every fifth trial that is not a probe trial
 is a non-beaconed trial. The rest of the trials are beaconed. In these functions, the indices for the different
trial types are separated and saved into beaconed, nbeaconed and probe arrays. If the arrays already exist, then the
data is loaded from the file (the location of this file is specified in init_params in main).
'''

import numpy as np
import os
import matplotlib.pylab as plt
import vr_process_movement
import file_reader
import parameters
import signal_for_indices

fr = file_reader.FileReader()

beaconed = None
nbeaconed = None
probe = None
trial_num = None


def keep_first_from_close_series(array, threshold):
    num_delete = 1
    while num_delete > 0:
        diff = np.ediff1d(array, to_begin=threshold + 1)
        to_delete = np.where(diff <= threshold)
        num_delete = len(to_delete[0])

        if num_delete > 0:
Exemple #14
0
def get_one_data_file(input_file, output_file_root):

    read_index_list = [[0, get_file_size(input_file) // NUM_SAMPLE_LENGTH]]

    read_index_list_arr = np.array(read_index_list)
    num_samples_total = np.sum(read_index_list_arr[:, 1] -
                               read_index_list_arr[:, 0])

    print(num_samples_total)

    file_reader = fr.FileReader()
    file_reader.initialize(input_file, get_file_size(input_file))
    output_file_temp = output_file_root + '.dat_shuffled'
    f_out = open(output_file_temp, 'wb')

    num_samples_saved = 0
    num_samples_saved_valid = 0

    for i_qp in range(NUM_QPS):
        for i in range(len(read_index_list)):
            if i == 0:
                index_last = 0
            else:
                index_last = read_index_list[i - 1][1]

            index_start = read_index_list[i][0]
            index_end = read_index_list[i][1]
            assert index_end > index_start
            assert index_start >= index_last
            num_samples = index_end - index_start

            if index_start > index_last:
                file_reader.read_data(
                    (index_start - index_last) * NUM_SAMPLE_LENGTH,
                    isloop=False,
                    dtype=np.uint8)

            index_start_in_batch = 0
            index_end_in_batch = 0
            vectors_lstm = []
            is_init = True
            while index_end_in_batch < num_samples:
                if is_init == True:
                    index_start_in_batch = 0
                    index_new_start_in_batch = 0
                    index_end_in_batch = BATCH_SAMPLES
                else:
                    index_start_in_batch += BATCH_SAMPLES_SWAP
                    index_new_start_in_batch = index_end_in_batch
                    index_end_in_batch += BATCH_SAMPLES_SWAP
                if index_end_in_batch > num_samples:
                    index_end_in_batch = num_samples

                num_samples_new = index_end_in_batch - index_new_start_in_batch

                data_new = file_reader.read_data(num_samples_new *
                                                 NUM_SAMPLE_LENGTH,
                                                 isloop=False,
                                                 dtype=np.uint8)
                data_new = data_new.reshape(num_samples_new, NUM_SAMPLE_LENGTH)

                data_info_new = np.copy(data_new[:, 0:64])
                data_new = np.copy(
                    data_new[:, 64 + NUM_SAMPLE_LENGTH_PER_QP * i_qp:64 +
                             NUM_SAMPLE_LENGTH_PER_QP * (i_qp + 1)])

                images_new = np.copy(data_new[:, 1 + NUM_LABEL_BYTES:])
                images_new = np.reshape(
                    images_new,
                    [num_samples_new, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS])

                vectors_lstm_new = 255 * np.ones(
                    (num_samples_new, VECTOR_LENGTH * LSTM_MAX_LENGTH)).astype(
                        np.float32)
                vectors_lstm_new[:, 0:VECTOR_LENGTH] = get_vectors(images_new)
                labels_qps_new = 255 * np.ones(
                    (num_samples_new,
                     (1 + NUM_LABEL_BYTES) * LSTM_MAX_LENGTH)).astype(np.uint8)
                labels_qps_new[:, 0:1 +
                               NUM_LABEL_BYTES] = data_new[:, 0:1 +
                                                           NUM_LABEL_BYTES]

                if is_init == True:
                    vectors_lstm = vectors_lstm_new
                    labels_qps = labels_qps_new
                else:
                    vectors_lstm = np.concatenate([
                        vectors_lstm[BATCH_SAMPLES_SWAP:vectors_lstm.shape[0]],
                        vectors_lstm_new
                    ],
                                                  axis=0)
                    labels_qps = np.concatenate([
                        labels_qps[BATCH_SAMPLES_SWAP:labels_qps.shape[0]],
                        labels_qps_new
                    ],
                                                axis=0)

                widths_new = data_info_new[:, 2] + 256 * data_info_new[:, 3]
                heights_new = data_info_new[:, 4] + 256 * data_info_new[:, 5]
                widths_in_64_new = widths_new // 64
                heights_in_64_new = heights_new // 64

                i_frames_new = data_info_new[:,
                                             10] + 256 * data_info_new[:,
                                                                       11] + 256 * 256 * data_info_new[:,
                                                                                                       12] + 256 * 256 * 256 * data_info_new[:,
                                                                                                                                             13]

                num_samples_valid_currentbatch = 0
                for i_sample_new in range(num_samples_new):
                    delta_frames_new = get_delta_ref_frames(
                        i_frames_new[i_sample_new])
                    data_info_new[i_sample_new, 0] = len(delta_frames_new)

                    if len(delta_frames_new
                           ) + 1 == LSTM_MAX_LENGTH and get_is_print(
                               i_frames_new[i_sample_new]) == True:
                        num_samples_saved_valid += 1
                        num_samples_valid_currentbatch += 1

                        f_out.write(data_info_new[i_sample_new, :])
                        f_out.write(labels_qps_new[i_sample_new,
                                                   0:(1 +
                                                      NUM_LABEL_BYTES)].astype(
                                                          np.float32))
                        f_out.write(vectors_lstm_new[i_sample_new,
                                                     0:VECTOR_LENGTH].astype(
                                                         np.float32))

                        for i_delta_frame in range(len(delta_frames_new)):
                            i_sample_ref_new = i_sample_new - i_delta_frame * widths_in_64_new[
                                i_sample_new] * heights_in_64_new[i_sample_new]
                            if is_init == True:
                                i_sample_ref_total = i_sample_ref_new
                            else:
                                i_sample_ref_total = i_sample_ref_new + BATCH_SAMPLES - BATCH_SAMPLES_SWAP

                            if i_sample_ref_total >= 0:
                                vectors_lstm_new[
                                    i_sample_new, VECTOR_LENGTH *
                                    (i_delta_frame + 1):VECTOR_LENGTH *
                                    (i_delta_frame +
                                     2)] = vectors_lstm[i_sample_ref_total,
                                                        0:VECTOR_LENGTH]
                                labels_qps_new[
                                    i_sample_new, (1 + NUM_LABEL_BYTES) *
                                    (i_delta_frame + 1):(1 + NUM_LABEL_BYTES) *
                                    (i_delta_frame +
                                     2)] = labels_qps[i_sample_ref_total,
                                                      0:(1 + NUM_LABEL_BYTES)]

                            f_out.write(labels_qps[i_sample_ref_total,
                                                   0:(1 +
                                                      NUM_LABEL_BYTES)].astype(
                                                          np.float32))
                            f_out.write(vectors_lstm[i_sample_ref_total,
                                                     0:VECTOR_LENGTH].astype(
                                                         np.float32))

                is_init = False
                num_samples_saved += num_samples_new
                print('QP No.%d, %d (%d Valid) / %d Samples Completed' %
                      (i_qp + 1, num_samples_saved, num_samples_saved_valid,
                       num_samples_total * NUM_QPS))
    f_out.close()

    output_file = output_file_root + ('_%d.dat_lstm_%dqps' %
                                      (num_samples_saved_valid, NUM_QPS))
    os.rename(output_file_temp, output_file)
    shuffle_samples(output_file, NUM_SAMPLE_LENGTH_OUT * NUM_QPS)
Exemple #15
0
    correct = 0.0
    total = len(test_set.keys())
    for key in test_set:
        real = test_set[key][-1]
        predicted = classifier.predict(test_set[key][0:-1])
        if real == predicted:
            correct += 1.0
    return correct / total


if __name__ == '__main__':
    print('Accuracy results:')
    file_name = "./dataset/amazon_cells_labelled_full.txt"
    train_file_name = "./dataset/amazon_cells_labelled_train.txt"
    test_file_name = "./dataset/amazon_cells_labelled_test.txt"
    data = file_reader.FileReader(file_name)
    # boolean
    train_set, _ = data.build_set("boolean", train_file_name)
    test_set, _ = data.build_set("boolean", test_file_name)
    classifier = rocchio_classifier.RocchioClassifier(train_set)
    print("Boolean:", '{:.3f}'.format(calc_accuracy(test_set, classifier)))
    # tf
    train_set, _ = data.build_set("tf", train_file_name)
    test_set, _ = data.build_set("tf", test_file_name)
    classifier = rocchio_classifier.RocchioClassifier(train_set)
    print("tf:", '{:.3f}'.format(calc_accuracy(test_set, classifier)))
    # tf-idf
    train_set, _ = data.build_set("tfidf", train_file_name)
    test_set, _ = data.build_set("tfidf", test_file_name)
    classifier = rocchio_classifier.RocchioClassifier(train_set)
    print("tfidf:", '{:.3f}'.format(calc_accuracy(test_set, classifier)))
Exemple #16
0
import file_reader
import echo

fr = file_reader.FileReader("testfile.xml")
fr.attach(echo.Echo())

fr.start()
fr.thread.join()

Exemple #17
0
    """
    correct = 0.0
    total = len(test_set.keys())
    for key in test_set:
        real = test_set[key][-1]
        predicted = classifier.predict(test_set[key][0:-1],
                                       similarity.CosineDistance)
        print(real, predicted)
        if real == predicted:
            correct += 1.0
    return correct / total


if __name__ == '__main__':
    file_reader = file_reader.FileReader(dataset.FULL_FILE,
                                         words_filter=dataset.stop_words,
                                         vector_type='tfidf')

    full_set = file_reader.build_set(dataset.FULL_FILE)
    train_set = file_reader.build_set(dataset.TRAIN_FILE)
    test_set = file_reader.build_set(dataset.TEST_FILE)

    classifier = rocchio_classifier.Rocchio_Classifier(train_set)
    print(calc_accuracy(test_set, classifier))
    # # svm_light_format(full_set)
    # k, queryID, query, rep_method = int(sys.argv[1]), sys.argv[2],str(sys.argv[3]), int(sys.argv[4])
    # eval = AdhocEval(k, query, rep_method)
    # output_file = "Output_"+str(queryID)+"_"+str(rep_method)+".txt"
    # output_dir = open(output_file, 'w')
    # sys.stdout = output_dir
    # eval.print_results()