コード例 #1
0
    def __init__(self, params, result):
        # initialize
        self._cur = 0  # current image
        self.transformer = DataTransformer()

        print "BatchLoader initialized with {} images".format(
            len(self.list_samples))
コード例 #2
0
__author__ = "Haoxiang Ma"

import DataTransformer
import IndexCreator
import sys
import os

if __name__ == "__main__":

    # set parameters
    folderName = str(sys.argv[1])
    folderName = folderName + "/" if not folderName.endswith(
        "/") else folderName
    numFilesToProcess = int(sys.argv[2])
    fileNames = os.listdir(folderName)

    # read documents from local, record documentID, content, and documentName
    documentList = list()
    for i, fileName in zip(range(1, numFilesToProcess + 1), fileNames):
        with open(folderName + fileName) as document:
            documentList.append((i, document.read(), fileName))

    # firstly, transform the text data
    d = DataTransformer.DataTransformer(documentList, folderName)
    d.transform()

    # secondly, create index by using the transformed data in the transfomer
    creator = IndexCreator.IndexCreator(d)
    creator.createTermIDFile()
    creator.createDocumentIDFile()
    creator.createInvertedIndex()
コード例 #3
0
    def __init__(self, dmdf):

        self.dmdf = self.preprocess(dmdf, 1400, 5)
        self.transformer = DataTransformer()
        self.normalizer = DataNormalizer()
コード例 #4
0
class DataManager:
    def __init__(self, dmdf):

        self.dmdf = self.preprocess(dmdf, 1400, 5)
        self.transformer = DataTransformer()
        self.normalizer = DataNormalizer()

    def preprocess(self, dmdf, elo, turns):
        dmdf.sort_values('elo', ascending=False)
        dmdf = dmdf.drop_duplicates(subset = 'battle_url', keep='first')
        dmdf = dmdf.drop(['battle_url'], axis=1)
        dmdf = dmdf.dropna()
        dmdf = dmdf[dmdf.elo != 2019] # Should maybe not do this
        dmdf = dmdf[dmdf.elo >= elo]
        dmdf = dmdf[dmdf.num_turns > 5]
        return dmdf


    def create_analytics_base_table(self):
        return self.normalizer.normalize(self.transformer.transform(self.dmdf))

if __name__ = "__main__":
    data = pd.read_csv('battle_data.csv')
    dm = DataManager(data).create_analytics_base_table()

    dt = DataTransformer()
    print([func for func in dir(dt) if callable(getattr(dt, func)) and not func.startswith("__")])
    print([str(inspect.signature(getattr(dt, func))) for func in dir(dt) if callable(getattr(dt, func)) and not func.startswith("__")])
    print([type(getattr(dt, func)) for func in dir(dt) if callable(getattr(dt, func)) and not func.startswith("__")])

コード例 #5
0
 def test_get_highest_speed_flag(self):
     dt = DataTransformer()
     r1 = 'Pikachu, Voltorb'
     r2 = 'Bulbasaur, Voltorb'
     winner = 'TIE'
     self.assertEqual(dt.get_highest_speed_flag(r1, r2), winner)