Python read_data 예제들, src.preprocessing.read_data Python 예제들

예제 #1

0

파일 보기

파일: test_aouda.py 프로젝트: casyazmon/mars_city

 def setUpClass(cls):
     super(AoudaTests, cls).setUpClass()
     dirname = os.path.dirname(__file__)
     cls.filename = os.path.join(dirname, 'small_dataset.dat')
     cls.resolution = 1000
     cls.base_datetime = datetime.now() - timedelta(seconds=15)
     cls.data = extract_hr_acc(read_data(cls.filename, cls.base_datetime))

예제 #2

0

파일 보기

파일: test_aouda.py 프로젝트: wasiqmukhtar/mars_city

 def setUpClass(cls):
     super(AoudaTests, cls).setUpClass()
     dirname = os.path.dirname(__file__)
     cls.filename = os.path.join(dirname, 'small_dataset.dat')
     cls.resolution = 1000
     cls.base_datetime = datetime.now() - timedelta(seconds=15)
     cls.data = extract_hr_acc(read_data(cls.filename, cls.base_datetime))

예제 #3

0

파일 보기

파일: test_preprocessing.py 프로젝트: lucianamaroun/probabilistic-ranking

 def test_read_data_unlabeled(self):
   """ Tests the function read_data for unlabeled data. """
   references = pre.read_data(self.testfilename)
   truth = [
     [Reference(0, 'm jones', 
         'symbol intersect detect method improv spatial intersect join', 
         ['e rundensteiner', 'y huang'], 'geoinformatica', None),
       Reference(1, 'matthew c jones', 
           'improv spatial intersect join symbol intersect detect', 
           ['e rundensteiner', 'h kuno', 'p marron', 'v taube', 'y ra'], 
           'sigmodels.intern manag data', None),
       Reference(2, 'matthew c jones',
           'view materi techniqu complex hirarch object', ['e rundensteiner',
           'y huang'], 'ssd symposium larg spatial databas', None)],
     [Reference(3, 'mike w miller', 'domin draw bipartit graph', 
         ['l berg'], 'sigucc special interest group univers comput servic',
         None),
       Reference(4, 'mike w miller', 'rel compromis statist databas', 
           [], 'sigucc special interest group univers comput servic', None)],
     [Reference(5, 'c chen', 'formal approach scenario analysi',
         ['d kung', 'j samuel', 'j gao', 'p hsia', 'y toyoshima'],
         'ieee softwar', None)],
     [Reference(6, 'jane j robinson', 'discours code clue context', [], 
         'acl meet the associ comput linguist', None),
       Reference(7, 'jane j robinson', 'diagram grammar dialogu', [],
           'cooper interfac inform system', None)],
     [Reference(8, 'a gupta', 'iri h java distanc educ', ['a gonzalez', 
         'a hamid', 'c overstreet', 'h wahab', 'j wild', 'k maly', 's ghanem',
         'x zhu'], 'acm journal educ resourc comput', None)],
     [Reference(9, 'mary d brown',
         'intern redund represent limit bypass support pipelin adder regist'
         'file', ['y patt'], 'proceed the th ieee intern symposium high '
         'perform comput architectur hpca intern symposium high perform '
         'comput architectur talk slide', None)]]
   self.assertEquals(references, truth)

예제 #4

0

파일 보기

파일: assumption_free_data_tests.py 프로젝트: wasiqmukhtar/mars_city

 def setUpClass(cls):
     """
     Reads a test data set into memory
     """
     dirname = os.path.dirname(__file__)
     filename = os.path.join(dirname, 'dataset.dat')
     cls._df = extract_hr_acc(read_data(filename))

예제 #5

0

파일 보기

 def test_read_data_labeled(self):
     """ Tests the function read_data for labeled data. """
     references = pre.read_data(self.testfilename, labeled=True)
     truth = [
         [
             Reference(
                 0, 'm jones',
                 'symbol intersect detect method improv spatial intersect join',
                 ['e rundensteiner', 'y huang'], 'geoinformatica', '81'),
             Reference(
                 1, 'matthew c jones',
                 'improv spatial intersect join symbol intersect detect', [
                     'e rundensteiner', 'h kuno', 'p marron', 'v taube',
                     'y ra'
                 ], 'sigmodels.intern manag data', '81'),
             Reference(2, 'matthew c jones',
                       'view materi techniqu complex hirarch object',
                       ['e rundensteiner', 'y huang'],
                       'ssd symposium larg spatial databas', '81')
         ],
         [
             Reference(
                 3, 'mike w miller', 'domin draw bipartit graph',
                 ['l berg'],
                 'sigucc special interest group univers comput servic',
                 '185'),
             Reference(
                 4, 'mike w miller', 'rel compromis statist databas', [],
                 'sigucc special interest group univers comput servic',
                 '185')
         ],
         [
             Reference(
                 5, 'c chen', 'formal approach scenario analysi',
                 ['d kung', 'j samuel', 'j gao', 'p hsia', 'y toyoshima'],
                 'ieee softwar', '94')
         ],
         [
             Reference(6, 'jane j robinson', 'discours code clue context',
                       [], 'acl meet the associ comput linguist', '69'),
             Reference(7, 'jane j robinson', 'diagram grammar dialogu', [],
                       'cooper interfac inform system', '69')
         ],
         [
             Reference(8, 'a gupta', 'iri h java distanc educ', [
                 'a gonzalez', 'a hamid', 'c overstreet', 'h wahab',
                 'j wild', 'k maly', 's ghanem', 'x zhu'
             ], 'acm journal educ resourc comput', '0')
         ],
         [
             Reference(
                 9, 'mary d brown',
                 'intern redund represent limit bypass support pipelin adder regist '
                 'file', ['y patt'],
                 'proceed the th ieee intern symposium high '
                 'perform comput architectur hpca intern symposium high perform comput'
                 ' architectur talk slide', '43')
         ]
     ]
     self.assertEquals(references, truth)

예제 #6

0

파일 보기

파일: test_preprocessing.py 프로젝트: casyazmon/mars_city

    def test_extract_hr_acc(self):
        data = read_data(path=PreprocessingTests.filename,
                          base_datetime=PreprocessingTests.base_datetime)
        frame = extract_hr_acc(data)

        columns = set(frame.columns.tolist())
        self.assertIn('hr', columns)
        self.assertIn('acc', columns)
        self.assertIn('acc_x', columns)
        self.assertIn('acc_y', columns)
        self.assertIn('acc_z', columns)
        self.assertIn('ratio', columns)
        self.assertIn('ratio_log', columns)
        self.assertItemsEqual(data.index.tolist(), frame.index.tolist())
        self.assertEqual(len(frame), len(notnull(frame)))

        data['eratio'] = (data.hr / data.IMU_Chest_Magnitude)
        data['eratio_log'] = log(data['eratio'])

        data = data.fillna(method='ffill').fillna(method='bfill')
        self.assertItemsEqual(data.hr.tolist(), frame.hr.tolist())
        self.assertItemsEqual(data.IMU_Chest_Magnitude.tolist(),
                              frame.acc.tolist())
        self.assertItemsEqual(data.IMU_Chest_x.tolist(), frame.acc_x.tolist())
        self.assertItemsEqual(data.IMU_Chest_y.tolist(), frame.acc_y.tolist())
        self.assertItemsEqual(data.IMU_Chest_z.tolist(), frame.acc_z.tolist())

        self.assertItemsEqual(data.eratio.tolist(), frame.ratio.tolist())
        self.assertItemsEqual(data.eratio_log.tolist(),
                              frame.ratio_log.tolist())

예제 #7

0

파일 보기

 def test_tfidf(self):
   """ Tests the tfidf function. """
   references = pre.read_data(self.testfilename)
   corpus = pre.get_corpus(references) + ['matt']
   idf = mod.get_idf(corpus)
   tfidf = mod.tfidf('matt',
       mod.get_words('matt matt huang kuno jones c marron brown'),
       idf)
   truth = idf['matt'] * 2
   self.assertEquals(tfidf, truth)

예제 #8

0

파일 보기

파일: test_preprocessing.py 프로젝트: casyazmon/mars_city

    def test_read_data_no_base_datetime(self):
        frame = read_data(path=PreprocessingTests.filename)

        columns = set(frame.columns.tolist())
        self.assertIn('activityID', columns)
        self.assertIn('hr', columns)
        self.assertIn('IMU_Chest_Magnitude', columns)
        self.assertIn('IMU_Chest_x', columns)
        self.assertIn('IMU_Chest_y', columns)
        self.assertIn('IMU_Chest_z', columns)
        self.assertLess(datetime.now(), frame.index[0])

예제 #9

0

파일 보기

파일: test_preprocessing.py 프로젝트: lucianamaroun/probabilistic-ranking

 def test_get_corpus(self):
   """ Tests the function get_corpus. """
   references = pre.read_data(self.testfilename)
   corpus = pre.get_corpus(references)
   truth = ['m jones', 'e rundensteiner', 'y huang', 'matthew c jones', 
       'e rundensteiner', 'h kuno', 'p marron', 'v taube', 'y ra', 
       'matthew c jones', 'e rundensteiner', 'y huang', 'mike w miller',
       'l berg', 'mike w miller', 'c chen', 'd kung', 'j samuel', 'j gao',
       'p hsia', 'y toyoshima', 'jane j robinson', 'jane j robinson',
       'a gupta', 'a gonzalez', 'a hamid', 'c overstreet', 'h wahab', 'j wild',
       'k maly', 's ghanem', 'x zhu', 'mary d brown', 'y patt']
   self.assertEquals(corpus, truth)

예제 #10

0

파일 보기

 def test_norm_tfidf(self):
   """ Tests the normalized tfidf. """
   references = pre.read_data(self.testfilename)
   corpus = pre.get_corpus(references) + ['matt']
   idf = mod.get_idf(corpus)
   words = mod.get_words('matt matt huang kuno jones c marron brown')
   n_tfidf = mod.norm_tfidf('matt', words, idf)
   truth = mod.tfidf('matt', words, idf)
   denom = 0
   for word in words:
     denom += mod.tfidf(word, words, idf) ** 2
   denom = math.sqrt(denom)
   truth /= denom
   self.assertEquals(n_tfidf, truth)

예제 #11

0

파일 보기

 def test_get_corpus(self):
     """ Tests the function get_corpus. """
     references = pre.read_data(self.testfilename)
     corpus = pre.get_corpus(references)
     truth = [
         'm jones', 'e rundensteiner', 'y huang', 'matthew c jones',
         'e rundensteiner', 'h kuno', 'p marron', 'v taube', 'y ra',
         'matthew c jones', 'e rundensteiner', 'y huang', 'mike w miller',
         'l berg', 'mike w miller', 'c chen', 'd kung', 'j samuel', 'j gao',
         'p hsia', 'y toyoshima', 'jane j robinson', 'jane j robinson',
         'a gupta', 'a gonzalez', 'a hamid', 'c overstreet', 'h wahab',
         'j wild', 'k maly', 's ghanem', 'x zhu', 'mary d brown', 'y patt'
     ]
     self.assertEquals(corpus, truth)

예제 #12

0

파일 보기

파일: train.py 프로젝트: sycomix/QuantumLab

def train(n, path_to_csv):
    '''
    train the model

    Input
    -----
    n  - polynomial degree
    path_to_csv - training set path
    '''
    # prepare data
    #-------------------------------------------------
    # load data
    raw_data = pre.read_data(path_to_csv)
    # rescale data
    feature_data, means, stds = pre.feature_scaling(raw_data, n)
    # split to train and test sets
    data = pre.split_data(feature_data)

    # create instance of the neural network
    #-------------------------------------------------
    model = ionn.InOutNN(n, init_random=True)

    # train the model
    #-------------------------------------------------
    model.train(data['train'][:, :-1],
                data['train'][:, -1],
                alpha=0.5,
                itmax=20000,
                verbose=False)
    #compute polynomial coefficients (from rescaled features)
    poly_coeffs = model.polynomial_coefficients(means, stds)

    print(np.flip(poly_coeffs, axis=0))

    # test the model
    #-------------------------------------------------
    train_err, test_err = model.test(data['train'][:, :-1], data['train'][:,
                                                                          -1],
                                     data['test'][:, :-1], data['test'][:, -1])

    # dump the model - Python object serialisation
    #-------------------------------------------------
    dir_path = os.path.dirname(os.path.realpath(__file__))
    model_file = dir_path + "/../MODEL/inoutnn_{}.pcl".format(n)
    with open(model_file, 'wb') as f:
        pickle.dump(model, f)

    return poly_coeffs

예제 #13

0

파일 보기

 def test_get_coauthorship_transactions(self):
   """ Tests the function get_coauthorship_transactions. """
   references = pre.read_data(self.testfilename)
   truth = [[
     ['m jones', 'e rundensteiner', 'y huang'],
       ['matthew c jones', 'e rundensteiner', 'h kuno', 'p marron', 'v taube', 
           'y ra'],
       ['matthew c jones', 'e rundensteiner', 'y huang']],
     [['mike w miller', 'l berg'],
       ['mike w miller']],
     [['c chen', 'd kung', 'j samuel', 'j gao', 'p hsia', 'y toyoshima']], 
     [['jane j robinson'],
       ['jane j robinson']],
     [['a gupta', 'a gonzalez', 'a hamid', 'c overstreet', 'h wahab',
           'j wild', 'k maly', 's ghanem', 'x zhu']],
     [['mary d brown', 'y patt']]]
   for i in range(len(references)):
     transactions = mod.get_coauthorship_transactions(references[i])
     self.assertEquals(transactions, truth[i])

예제 #14

0

파일 보기

 def test_soft_tfidf(self):
   """ Tests the soft tfidf. """
   references = pre.read_data(self.testfilename)
   corpus = pre.get_corpus(references) + ['matt', 'brow']
   idf = mod.get_idf(corpus)
   name_a = 'matt huang kuno jones brow'
   words_a = mod.get_words(name_a)
   name_b = 'matthew jones c marron brown'
   words_b = mod.get_words(name_b)
   s_tfidf = mod.soft_tfidf(name_a, name_b, idf)
   close = ['matt', 'jones', 'brow']
   truth = \
     mod.norm_tfidf('matt', words_a, idf) * \
         mod.norm_tfidf('matthew', words_b, idf) * \
         Levenshtein.ratio('matt', 'matthew') + \
     mod.norm_tfidf('jones', words_a, idf) * \
         mod.norm_tfidf('jones', words_b, idf) * \
         Levenshtein.ratio('jones', 'jones') + \
     mod.norm_tfidf('brow', words_a, idf) * \
         mod.norm_tfidf('brown', words_b, idf) * \
         Levenshtein.ratio('brow', 'brown')
   self.assertEquals(s_tfidf, truth)

예제 #15

0

파일 보기

파일: test_prob_ranking.py 프로젝트: lucianamaroun/probabilistic-ranking

 def setUp(self):
     super(RankingSampleTestCase, self).setUp()
     self.references = pre.read_data(self.testfilename)
     self.base_partitioning = [[0, 0, 1], [0, 1], [0], [0, 0], [0], [0]]
     self.alt_partitionings = [[[0, 1, 1], [1, 0], [0], [0, 0], [0], [0]],
                               [[1, 0, 1], [0, 1], [0], [0, 0], [0], [0]]]

예제 #16

0

파일 보기

파일: test_prob_ranking.py 프로젝트: lucianamaroun/probabilistic-ranking

 def setUp(self):
   super(RankingSampleTestCase, self).setUp()
   self.references = pre.read_data(self.testfilename)
   self.base_partitioning = [[0, 0, 1], [0, 1], [0], [0, 0], [0], [0]]
   self.alt_partitionings = [[[0, 1, 1], [1, 0], [0], [0, 0], [0], [0]],
       [[1, 0, 1], [0, 1], [0], [0, 0], [0], [0]]]

예제 #17

0

파일 보기

 def test_get_coauthorship_rules(self):
   """ Tests the function get_coauthorship_rules. """
   references = pre.read_data(self.testfilename)
   truth = [{'m jones': {'e rundensteiner': 1.0, 
       'y huang': 1.0},
     'e rundensteiner': {'m jones': 1.0/3,
       'y huang': 2.0/3,
       'matthew c jones': 2.0/3, 
       'h kuno': 1.0/3,
       'p marron': 1.0/3,
       'v taube': 1.0/3, 
       'y ra': 1.0/3
     },
     'y huang': {'m jones': 1.0/2,
       'e rundensteiner': 1.0,
       'matthew c jones': 1.0/2
     },
     'matthew c jones': {'e rundensteiner': 1.0,
       'y huang': 1.0/2, 
       'h kuno': 1.0/2,
       'p marron': 1.0/2,
       'v taube': 1.0/2, 
       'y ra': 1.0/2
     },
     'h kuno': {'e rundensteiner': 1.0,
       'matthew c jones': 1.0,
       'p marron': 1.0,
       'v taube': 1.0, 
       'y ra': 1.0
     },
     'p marron': {'e rundensteiner': 1.0,
       'matthew c jones': 1.0,
       'h kuno': 1.0,
       'v taube': 1.0, 
       'y ra': 1.0
     },
     'v taube': {'e rundensteiner': 1.0,
       'matthew c jones': 1.0,
       'h kuno': 1.0,
       'p marron': 1.0, 
       'y ra': 1.0
     },
     'y ra': {'e rundensteiner': 1.0,
       'matthew c jones': 1.0,
       'h kuno': 1.0,
       'p marron': 1.0, 
       'v taube': 1.0
     }},
     {'mike w miller': {'l berg': 1.0/2},
     'l berg': {'mike w miller': 1.0}},
     {'c chen': {'d kung': 1.0,
       'j samuel': 1.0,
       'j gao': 1.0,
       'p hsia': 1.0,
       'y toyoshima': 1.0
     },
     'd kung': {'c chen': 1.0,
       'j samuel': 1.0,
       'j gao': 1.0,
       'p hsia': 1.0,
       'y toyoshima': 1.0
     },
     'j samuel': {'c chen': 1.0,
       'd kung': 1.0,
       'j gao': 1.0,
       'p hsia': 1.0,
       'y toyoshima': 1.0
     },
     'j gao': {'c chen': 1.0,
       'd kung': 1.0,
       'j samuel': 1.0,
       'p hsia': 1.0,
       'y toyoshima': 1.0
     },
     'p hsia': {'c chen': 1.0,
       'd kung': 1.0,
       'j samuel': 1.0,
       'j gao': 1.0,
       'y toyoshima': 1.0
     },
     'y toyoshima': {'c chen': 1.0,
       'd kung': 1.0,
       'j samuel': 1.0,
       'j gao': 1.0,
       'p hsia': 1.0
     }},
     {},
     {'a gupta': {'a gonzalez': 1.0,
       'a hamid': 1.0, 
       'c overstreet': 1.0,
       'h wahab': 1.0,
       'j wild': 1.0,
       'k maly': 1.0,
       's ghanem': 1.0,
       'x zhu': 1.0
     },
     'a gonzalez': {'a gupta': 1.0,
       'a hamid': 1.0, 
       'c overstreet': 1.0,
       'h wahab': 1.0,
       'j wild': 1.0,
       'k maly': 1.0,
       's ghanem': 1.0,
       'x zhu': 1.0
     },
     'a hamid': {'a gupta': 1.0,
       'a gonzalez': 1.0, 
       'c overstreet': 1.0,
       'h wahab': 1.0,
       'j wild': 1.0,
       'k maly': 1.0,
       's ghanem': 1.0,
       'x zhu': 1.0
     },
     'c overstreet': {'a gupta': 1.0,
       'a gonzalez': 1.0, 
       'a hamid': 1.0,
       'h wahab': 1.0,
       'j wild': 1.0,
       'k maly': 1.0,
       's ghanem': 1.0,
       'x zhu': 1.0
     },
     'h wahab': {'a gupta': 1.0,
       'a gonzalez': 1.0, 
       'a hamid': 1.0,
       'c overstreet': 1.0,
       'j wild': 1.0,
       'k maly': 1.0,
       's ghanem': 1.0,
       'x zhu': 1.0
     },
     'j wild': {'a gupta': 1.0,
       'a gonzalez': 1.0, 
       'a hamid': 1.0,
       'c overstreet': 1.0,
       'h wahab': 1.0,
       'k maly': 1.0,
       's ghanem': 1.0,
       'x zhu': 1.0
     },
     'k maly': {'a gupta': 1.0,
       'a gonzalez': 1.0, 
       'a hamid': 1.0,
       'c overstreet': 1.0,
       'h wahab': 1.0,
       'j wild': 1.0,
       's ghanem': 1.0,
       'x zhu': 1.0
     },
     's ghanem': {'a gupta': 1.0,
       'a gonzalez': 1.0, 
       'a hamid': 1.0,
       'c overstreet': 1.0,
       'h wahab': 1.0,
       'j wild': 1.0,
       'k maly': 1.0,
       'x zhu': 1.0
     },
     'x zhu': {'a gupta': 1.0,
       'a gonzalez': 1.0, 
       'a hamid': 1.0,
       'c overstreet': 1.0,
       'h wahab': 1.0,
       'j wild': 1.0,
       'k maly': 1.0,
       's ghanem': 1.0
     }},
     {'mary d brown': {'y patt': 1.0},
     'y patt': {'mary d brown': 1.0}} 
   ]
   for i in range(len(references)):
     rules = mod.get_coauthorship_rules(references[i])
     for author_a in rules:
       for author_b in rules[author_a]:
         self.assertEquals(round(rules[author_a][author_b], 4), 
           round(truth[i][author_a][author_b], 4))
     for author_a in truth[i]:
       for author_b in truth[i][author_a]:
         self.assertEquals(round(rules[author_a][author_b], 4), 
             round(truth[i][author_a][author_b], 4))

예제 #18

0

파일 보기

from PyTango import DeviceProxy

sys.path.append("../../")


from src.preprocessing import read_data, extract_hr_acc

if __name__ == '__main__':

    proxy = DeviceProxy("C3/hr_monitor/1")

    dirname = os.path.dirname(__file__)

    filename = os.path.join(dirname, 'dataset.dat')
    print(filename)
    data = extract_hr_acc(read_data(filename))
    data = data.resample('1000L')

    DP = namedtuple("DP", ["timestamp", "hr", "acc_x", "acc_y", "acc_z"])
    i = 0
    prow, ptimestamp = None, None
    for index, row in data.iterrows():
        timestamp = float(index.to_datetime().strftime('%s.%f'))
        progress = (i / len(data)) * 100
        if prow is not None:
            datapoint = DP(timestamp=ptimestamp, hr=prow['hr'],
                           acc_x=prow['acc_x'], acc_y=prow['acc_y'],
                           acc_z=prow['acc_z'])
            print("%s - %s, %s - %s%%" % (datetime.now(), index.to_datetime(),
                                          datapoint.hr, progress))
            proxy.register_datapoint(datapoint)