def readCSV(self, csvPath):
     try:
         self.data = pd.read_csv(
             os.path.join(context.get_current_project_data_path(), csvPath))
     except:
         try:
             self.data = pd.read_csv(os.path.join(
                 context.get_current_project_data_path(), csvPath),
                                     encoding="cp1251")
         except:
             self.data = pd.read_csv(csvPath)
Beispiel #2
0
 def addPath(self, imagePath):
     p0 = os.path.join(context.get_current_project_data_path(), imagePath)
     if not os.path.exists(p0):
         p0 = imagePath
     ld0 = os.listdir(p0)
     for x in ld0:
         fp = os.path.join(p0, x)
         self.images[x] = fp
         self.images[x[:-4]] = fp
def from_csv(path,
             targetColumn: str,
             featureColumn: str,
             idColumn: str = None,
             sep=",",
             absPath=False):
    if not absPath:
        path = os.path.join(get_current_project_data_path(), path)
    return CSVDataSet(path, targetColumn, featureColumn, idColumn, sep)
Beispiel #4
0
def translate(sentence: str):
    file_path = os.path.join(context.get_current_project_data_path(),
                             "rus.vocab")
    vocabulary = utils.load(file_path)
    preds = generic.parse('eng_to_ru').predictions(
        builtin_datasets.from_array([sentence], ['']))
    for item in preds:
        rootItem = item.rootItem()
        sentence = ''
        for indices in item.prediction:
            sentence = sentence + " " + vocabulary.i2w[np.argmax(indices)]
        print(rootItem.x + " " + sentence)
    def readSettings(self, csvPath) -> dict:
        if os.path.isabs(csvPath):
            absPath = csvPath
        else:
            absPath = os.path.join(context.get_current_project_data_path(),
                                   csvPath)

        fDir = os.path.dirname(absPath)
        fName = "." + os.path.basename(absPath) + ".dataset_desc"
        settingsPath = os.path.join(fDir, fName)
        settingsObj = load_yaml(settingsPath)
        return settingsObj
Beispiel #6
0
def make_predictions():
    experiments = projects.Project(Path(__file__).parent.parent).experiments()
    for exp in experiments:
        if exp.isCompleted():
            file_path=os.path.join(context.get_current_project_data_path(),"rus.vocab")
            vocabulary=utils.load(file_path)            
            preds = generic.parse(exp.path).predictions('test')
            for item in preds:
                rootItem = item.rootItem()
                sentence = ''
                for indices in item.prediction:
                    sentence = sentence + " " + vocabulary.i2w[np.argmax(indices)]                    
                print(rootItem.x + " " +  sentence)
 def addPath(self, imagePath):
     p0 = os.path.join(context.get_current_project_data_path(), imagePath)
     if not os.path.exists(p0):
         p0 = imagePath
     ld0 = os.listdir(p0)
     nm = os.path.basename(p0)
     for x in ld0:
         ext = x[-4:]
         if ext == ".jpg" or ext == ".png" or ext == ".gif":
             fp = os.path.join(p0, x)
             self.images[x] = fp
             self.data["ImageId"].append(x)
             self.data["Clazz"].append(nm)
             self.images[x[:-4]] = fp
Beispiel #8
0
    def load_docs(self, path, encoding, num2Class):
        fp = os.path.join(context.get_current_project_data_path(), path)
        if os.path.isdir(fp):
            files = os.listdir(fp)
            for q in tqdm.tqdm(files, "loading files"):

                if ".txt" in q[-4:]:
                    self.load_docs(os.path.join(fp, q), encoding, num2Class)
            return
        csen = Sentence()
        cdoc = Doc()
        csen.doc = cdoc
        cdoc.num = 0
        dnum = 0
        with open(fp, encoding=encoding) as file:
            line = file.readline()
            while line:
                sc, dc, fields = self.__process(line)
                if len(fields) > 0 and len(fields[0]) > 0:
                    if not sc and not dc:
                        tc = Token(fields[0], fields[1:])
                        for x in range(len(tc.fields)):
                            vm = tc.fields[x]
                            if not x in num2Class:
                                num2Class[x] = set()
                            num2Class[x].add(vm)

                        csen.tokens.append(tc)
                if sc:
                    if len(csen.tokens) > 0:
                        self.sentences.append(csen)
                        cdoc.sentences.append(csen)
                        csen = Sentence()
                        csen.doc = cdoc
                if dc:
                    if len(cdoc.sentences) > 0:
                        self.docs.append(cdoc)
                        dnum = dnum + 1
                        cdoc = Doc()
                        cdoc.num = dnum
                line = file.readline()

        if len(csen.tokens) > 0:
            self.sentences.append(csen)
            cdoc.sentences.append(csen)
        if len(cdoc.sentences) > 0:
            self.docs.append(cdoc)
    def addPath(self, imagePath):
        current_project_data_path = context.get_current_project_data_path()

        print("addPath context path: " + current_project_data_path)
        print("addPath image_path: " + imagePath)

        p0 = os.path.join(current_project_data_path, imagePath)

        print("p0: " + p0)

        if not os.path.exists(p0):
            p0 = imagePath
        ld0 = os.listdir(p0)
        for x in ld0:
            fp = os.path.join(p0, x)
            self.images[x] = fp
            self.images[x[:-4]] = fp
Beispiel #10
0
# def getBengali1():
#     return image_datasets.MultiOutputClassClassificationDataSet("bengali/train_1", "bengali/train.csv", 'image_id', ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic'])

@datasets.dataset_provider(origin="test.csv",kind="MultiClassificationDataset")
def getBengaliTest0():
    return image_datasets.MultiOutputClassClassificationDataSet("bengali/test_0", "bengali/test_flat.csv", 'image_id', ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic'])

# @datasets.dataset_provider(origin="train.csv",kind="GenericDataSet")
# def getBengali0_small():
#     return image_datasets.MultiOutputClassClassificationDataSet("bengali/train_0", "bengali/train.csv", 'image_id', ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic'], len=10000)

import pandas as pd
import tqdm
import numpy as np

p=context.get_current_project_data_path()
HEIGHT = 137
WIDTH = 236


@datasets.dataset_provider(origin="test.csv",kind="MultiClassificationDataset")
def getData1():
    ds=pd.read_csv(f"{p}/bengaliai-cv19/train.csv")
    gr=ds["grapheme_root"].values
    vd=ds["vowel_diacritic"].values
    cd=ds["consonant_diacritic"].values
    for i in range(1):
        df = pd.read_parquet(f"{p}/bengaliai-cv19/train_image_data_{i}.parquet")
        data0 = df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)
        
        class M(datasets.DataSet):
Beispiel #11
0
 def __init__(self, path: str):
     inp_file = os.path.join(context.get_current_project_data_path(), path)
     data = to_pairs(load_doc(inp_file))
     self.src = [x[0] for x in data]
     self.dest = [x[1] for x in data]