def __init__(self, params, result): # initialize self._cur = 0 # current image self.transformer = DataTransformer() print "BatchLoader initialized with {} images".format( len(self.list_samples))
__author__ = "Haoxiang Ma" import DataTransformer import IndexCreator import sys import os if __name__ == "__main__": # set parameters folderName = str(sys.argv[1]) folderName = folderName + "/" if not folderName.endswith( "/") else folderName numFilesToProcess = int(sys.argv[2]) fileNames = os.listdir(folderName) # read documents from local, record documentID, content, and documentName documentList = list() for i, fileName in zip(range(1, numFilesToProcess + 1), fileNames): with open(folderName + fileName) as document: documentList.append((i, document.read(), fileName)) # firstly, transform the text data d = DataTransformer.DataTransformer(documentList, folderName) d.transform() # secondly, create index by using the transformed data in the transfomer creator = IndexCreator.IndexCreator(d) creator.createTermIDFile() creator.createDocumentIDFile() creator.createInvertedIndex()
def __init__(self, dmdf): self.dmdf = self.preprocess(dmdf, 1400, 5) self.transformer = DataTransformer() self.normalizer = DataNormalizer()
class DataManager: def __init__(self, dmdf): self.dmdf = self.preprocess(dmdf, 1400, 5) self.transformer = DataTransformer() self.normalizer = DataNormalizer() def preprocess(self, dmdf, elo, turns): dmdf.sort_values('elo', ascending=False) dmdf = dmdf.drop_duplicates(subset = 'battle_url', keep='first') dmdf = dmdf.drop(['battle_url'], axis=1) dmdf = dmdf.dropna() dmdf = dmdf[dmdf.elo != 2019] # Should maybe not do this dmdf = dmdf[dmdf.elo >= elo] dmdf = dmdf[dmdf.num_turns > 5] return dmdf def create_analytics_base_table(self): return self.normalizer.normalize(self.transformer.transform(self.dmdf)) if __name__ = "__main__": data = pd.read_csv('battle_data.csv') dm = DataManager(data).create_analytics_base_table() dt = DataTransformer() print([func for func in dir(dt) if callable(getattr(dt, func)) and not func.startswith("__")]) print([str(inspect.signature(getattr(dt, func))) for func in dir(dt) if callable(getattr(dt, func)) and not func.startswith("__")]) print([type(getattr(dt, func)) for func in dir(dt) if callable(getattr(dt, func)) and not func.startswith("__")])
def test_get_highest_speed_flag(self): dt = DataTransformer() r1 = 'Pikachu, Voltorb' r2 = 'Bulbasaur, Voltorb' winner = 'TIE' self.assertEqual(dt.get_highest_speed_flag(r1, r2), winner)