def readCSV(self, csvPath): try: self.data = pd.read_csv( os.path.join(context.get_current_project_data_path(), csvPath)) except: try: self.data = pd.read_csv(os.path.join( context.get_current_project_data_path(), csvPath), encoding="cp1251") except: self.data = pd.read_csv(csvPath)
def addPath(self, imagePath): p0 = os.path.join(context.get_current_project_data_path(), imagePath) if not os.path.exists(p0): p0 = imagePath ld0 = os.listdir(p0) for x in ld0: fp = os.path.join(p0, x) self.images[x] = fp self.images[x[:-4]] = fp
def from_csv(path, targetColumn: str, featureColumn: str, idColumn: str = None, sep=",", absPath=False): if not absPath: path = os.path.join(get_current_project_data_path(), path) return CSVDataSet(path, targetColumn, featureColumn, idColumn, sep)
def translate(sentence: str): file_path = os.path.join(context.get_current_project_data_path(), "rus.vocab") vocabulary = utils.load(file_path) preds = generic.parse('eng_to_ru').predictions( builtin_datasets.from_array([sentence], [''])) for item in preds: rootItem = item.rootItem() sentence = '' for indices in item.prediction: sentence = sentence + " " + vocabulary.i2w[np.argmax(indices)] print(rootItem.x + " " + sentence)
def readSettings(self, csvPath) -> dict: if os.path.isabs(csvPath): absPath = csvPath else: absPath = os.path.join(context.get_current_project_data_path(), csvPath) fDir = os.path.dirname(absPath) fName = "." + os.path.basename(absPath) + ".dataset_desc" settingsPath = os.path.join(fDir, fName) settingsObj = load_yaml(settingsPath) return settingsObj
def make_predictions(): experiments = projects.Project(Path(__file__).parent.parent).experiments() for exp in experiments: if exp.isCompleted(): file_path=os.path.join(context.get_current_project_data_path(),"rus.vocab") vocabulary=utils.load(file_path) preds = generic.parse(exp.path).predictions('test') for item in preds: rootItem = item.rootItem() sentence = '' for indices in item.prediction: sentence = sentence + " " + vocabulary.i2w[np.argmax(indices)] print(rootItem.x + " " + sentence)
def addPath(self, imagePath): p0 = os.path.join(context.get_current_project_data_path(), imagePath) if not os.path.exists(p0): p0 = imagePath ld0 = os.listdir(p0) nm = os.path.basename(p0) for x in ld0: ext = x[-4:] if ext == ".jpg" or ext == ".png" or ext == ".gif": fp = os.path.join(p0, x) self.images[x] = fp self.data["ImageId"].append(x) self.data["Clazz"].append(nm) self.images[x[:-4]] = fp
def load_docs(self, path, encoding, num2Class): fp = os.path.join(context.get_current_project_data_path(), path) if os.path.isdir(fp): files = os.listdir(fp) for q in tqdm.tqdm(files, "loading files"): if ".txt" in q[-4:]: self.load_docs(os.path.join(fp, q), encoding, num2Class) return csen = Sentence() cdoc = Doc() csen.doc = cdoc cdoc.num = 0 dnum = 0 with open(fp, encoding=encoding) as file: line = file.readline() while line: sc, dc, fields = self.__process(line) if len(fields) > 0 and len(fields[0]) > 0: if not sc and not dc: tc = Token(fields[0], fields[1:]) for x in range(len(tc.fields)): vm = tc.fields[x] if not x in num2Class: num2Class[x] = set() num2Class[x].add(vm) csen.tokens.append(tc) if sc: if len(csen.tokens) > 0: self.sentences.append(csen) cdoc.sentences.append(csen) csen = Sentence() csen.doc = cdoc if dc: if len(cdoc.sentences) > 0: self.docs.append(cdoc) dnum = dnum + 1 cdoc = Doc() cdoc.num = dnum line = file.readline() if len(csen.tokens) > 0: self.sentences.append(csen) cdoc.sentences.append(csen) if len(cdoc.sentences) > 0: self.docs.append(cdoc)
def addPath(self, imagePath): current_project_data_path = context.get_current_project_data_path() print("addPath context path: " + current_project_data_path) print("addPath image_path: " + imagePath) p0 = os.path.join(current_project_data_path, imagePath) print("p0: " + p0) if not os.path.exists(p0): p0 = imagePath ld0 = os.listdir(p0) for x in ld0: fp = os.path.join(p0, x) self.images[x] = fp self.images[x[:-4]] = fp
# def getBengali1(): # return image_datasets.MultiOutputClassClassificationDataSet("bengali/train_1", "bengali/train.csv", 'image_id', ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']) @datasets.dataset_provider(origin="test.csv",kind="MultiClassificationDataset") def getBengaliTest0(): return image_datasets.MultiOutputClassClassificationDataSet("bengali/test_0", "bengali/test_flat.csv", 'image_id', ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']) # @datasets.dataset_provider(origin="train.csv",kind="GenericDataSet") # def getBengali0_small(): # return image_datasets.MultiOutputClassClassificationDataSet("bengali/train_0", "bengali/train.csv", 'image_id', ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic'], len=10000) import pandas as pd import tqdm import numpy as np p=context.get_current_project_data_path() HEIGHT = 137 WIDTH = 236 @datasets.dataset_provider(origin="test.csv",kind="MultiClassificationDataset") def getData1(): ds=pd.read_csv(f"{p}/bengaliai-cv19/train.csv") gr=ds["grapheme_root"].values vd=ds["vowel_diacritic"].values cd=ds["consonant_diacritic"].values for i in range(1): df = pd.read_parquet(f"{p}/bengaliai-cv19/train_image_data_{i}.parquet") data0 = df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8) class M(datasets.DataSet):
def __init__(self, path: str): inp_file = os.path.join(context.get_current_project_data_path(), path) data = to_pairs(load_doc(inp_file)) self.src = [x[0] for x in data] self.dest = [x[1] for x in data]