def get_en_dictionary() -> List[str]:
    # check if file was already created
    path = os.path.join(os.getcwd(), 'data', 'spellchecker',
                        'hunspell-en_US-2018', 'en_US.pkl')
    if not check_if_file_exists(path):
        logger.info(
            f'Pre pickeled spellchecker dictionary does not exist at {path}.')
        # load source dict and process it
        src_dict_path = os.path.join(os.getcwd(), 'data', 'spellchecker',
                                     'hunspell-en_US-2018', 'en_US.dic')
        if not check_if_file_exists(src_dict_path):
            logger.error(
                'Could not find source spellchecker file at path {src_dict_path}. Please download it from the website.'
            )
            raise ValueError(
                f'Source spellchecker file at path {src_dict_path} was not found. Please download it from the website.'
            )

        # process file
        dictionary = []
        with open(src_dict_path, encoding='utf8') as input_file:
            first_line = True
            for line in input_file:
                if first_line:
                    first_line = False
                    continue
                parts = line.split('/')
                line = parts[0].replace('\n', '')
                dictionary.append(line)

        # save dict
        with open(path, 'wb') as f:
            pickle.dump(dictionary, f)
        logger.info(
            f'File successfully loaded and created. It is located at {path}')
        return dictionary

    # file exists, load it and return it
    with open(path, 'rb') as f:
        dictionary = pickle.load(f)

    if dictionary:
        logger.info(
            f'Dictionary successfully unpickeld. Loaded {len(dictionary)} words'
        )
    return dictionary
    def _try_load(self, name, fields):
        path = os.path.join(os.getcwd(), 'data', 'data', 'cache')
        create_dir_if_necessary(path)
        samples_path = os.path.join(path, name + "2.pkl")
        aspects_path = os.path.join(path, name + "_2aspects.pkl")

        if not check_if_file_exists(samples_path) or not check_if_file_exists(
                aspects_path):
            return [], None

        with open(samples_path, 'rb') as f:
            examples = pickle.load(f)

        with open(aspects_path, 'rb') as f:
            self.aspects = pickle.load(f)

        # get all fields
        fields = self._construct_fields(fields)
        return examples, fields
def get_de_dictionary() -> List[str]:
    # check if file was already created
    path = os.path.join(os.getcwd(), 'data', 'spellchecker', 'de', 'de_DE.pkl')
    if not check_if_file_exists(path):
        logger.info(
            f'Pre pickeled spellchecker dictionary does not exist at {path}.')
        # load source dict and process it
        src_dict_path = os.path.join(os.getcwd(), 'data', 'spellchecker', 'de',
                                     'german.dic')
        if not check_if_file_exists(src_dict_path):
            logger.error(
                'Could not find source spellchecker file at path {src_dict_path}. Please download it from the website. (https://sourceforge.net/projects/germandict/files/)'
            )
            raise ValueError(
                f'Source spellchecker file at path {src_dict_path} was not found. Please download it from the website. (https://sourceforge.net/projects/germandict/files/)'
            )

        # process file
        dictionary = []
        with open(src_dict_path, 'rb') as input_file:
            for line in input_file:
                line = line.decode("iso-8859-1", errors='strict')
                line = line.replace('\n', '')
                line = line.replace('\r', '')

                dictionary.append(line)

        # save dict
        with open(path, 'wb') as f:
            pickle.dump(dictionary, f)
        logger.info(
            f'File successfully loaded and created. It is located at {path}')
        return dictionary

    # file exists, load it and return it
    with open(path, 'rb') as f:
        dictionary = pickle.load(f)

    if dictionary:
        logger.info(
            f'Dictionary successfully unpickeld. Loaded {len(dictionary)} words'
        )
    return dictionary
Example #4
0
    def restore_bow(self, path):
        """ Restores a specific bag of words by restoring its vocabulary. """

        if not utils.check_if_file_exists(path):
            print "Could not find bow. Path: {0}".format(path)
            return False
        self.bow = self.bow.load(path)
        self.bowTrained = True
        print "BOW successfully restored."
        return True
Example #5
0
def get_model_dict():
    """ Loads and returns the model dictionary."""

    # load classifier dictionary
    path = utils.get_data_path() + "model_dictionary"

    # initialize if file doesn't exist
    modelDictionary = {}
    if utils.check_if_file_exists(path):
        with open(path, "r") as f:
            modelDictionary = pickle.load(f)
    return modelDictionary
def get_organic_dictionary() -> List[str]:
    # load organic specific entities
    path = os.path.join(os.getcwd(), 'data', 'spellchecker',
                        'organic-words.txt')

    if not check_if_file_exists(path):
        logger.error(
            f'Could not find source spellchecker file at path {path}. Please download it from the website.'
        )
        return []
    dictionary = []
    with open(path, encoding='utf8') as input_file:
        for line in input_file:
            dictionary.append(line.replace('\n', ''))

    return dictionary
    def restore_custom_marker(self):
        """ Restores a custom marker from a file."""

        path = utils.get_data_path() + "segmentationMarker"
        if utils.check_if_file_exists(path):  
            with open(path, "r") as f:
                markerFile = pickle.load(f) 
                self.MARKER_SIZE = markerFile["markerDimension"]
                

                # restore kps
                self.CUSTOM_MARKER_IMAGE = cv.imread(utils.get_data_path() + "segmentationMarkerImage.jpg", 0)
                sift = cv.SIFT()
                kp = sift.detect(self.CUSTOM_MARKER_IMAGE,None)
                self.CUSTOM_MARKER = (kp, markerFile["marker"])

                print "restored custom marker"
def get_organic_words_replacement() -> Dict:
    # load organic specific entities
    path = os.path.join(os.getcwd(), 'data', 'spellchecker',
                        'organic-space-replace.txt')

    if not check_if_file_exists(path):
        logger.error(
            f'Could not find source spellchecker file at path {path}. Please download it from the website.'
        )
        return []
    dictionary = {}
    with open(path, encoding='utf8') as input_file:
        for line in input_file:
            line = line.replace('\n', '')
            k = line.replace('-', '')
            v = line.replace('-', ' ')
            dictionary[k] = v

    return dictionary
    def __init__(self,
                 task,
                 experiment_name,
                 experiment_description,
                 default_hp,
                 overwrite_hp,
                 data_loaders,
                 dataset_infos,
                 runs=5,
                 load_model_path=None,
                 produce_baseline=False):

        # make sure preferences are set
        assert data_loaders is not None
        assert len(data_loaders) == len(dataset_infos["data_root"])
        assert runs > 0

        self.task = task
        self.experiment_name = experiment_name
        self.experiment_description = experiment_description
        self.default_hp = default_hp
        self.overwrite_hp = overwrite_hp
        self.use_cuda = torch.cuda.is_available()
        self.dsls = data_loaders
        self.dataset_infos = dataset_infos
        self.runs = runs
        self.hp = None
        self.data_frame = pd.DataFrame()
        self.load_model_path = load_model_path
        self.skip_source_training = False  # skip training if source model loaded
        self.produce_baseline = produce_baseline

        print(
            f'Transfer Learning Experiment {self.experiment_name} initialized. Source: {dataset_infos["data_root"][0]} -> Target {dataset_infos["data_root"][1]}'
        )
        if self.load_model_path is not None:
            print(f'Try to restore model at ' + self.load_model_path)

            if not utils.check_if_file_exists(self.load_model_path):
                print(
                    f'Could not find model path. Please make sure the directory exists.'
                )
Example #10
0
    def plot_dataset_stats(self, samples, labels, title, fileName):
        path = os.path.join(self.img_stats_folder, fileName)
        # don't generate if already exists
        if check_if_file_exists(path):
            return

        try:
            df = pd.DataFrame({'Samples': samples, 'Aspect': labels})

            plt.figure(figsize=(20, 10))
            ax = sns.barplot(data=df, color='b', x='Aspect', y='Samples')
            plt.title(title, fontsize=20)
            plt.xticks(rotation=45, ha="right")
            ax.get_yaxis().get_major_formatter().set_scientific(False)
            plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
            ax.yaxis.set_major_formatter(
                mpl.ticker.StrMethodFormatter('{x:,.0f}'))

            plt.savefig(path, format=fileName.split('.')[-1])
        except Exception as err:
            self.logger.exception('Could not plot ' + title)
Example #11
0
	def load_spellchecker_cache(self, language):
		path = os.path.join(os.getcwd(), 'data', 'spellchecker', language + '_cache.pkl')
		if check_if_file_exists(path):
			with open(path, 'rb') as f:
				loaded = pickle.load(f)
				self.spellCheckerReplaced = loaded
Example #12
0
    def load_model(self, modelUuid, manualPath=None):
        """ 
        Load model from either model dictionary or manually.
        
        Keyword arguments:
        modelUuid -- uuid of the model. If None the method will try to load the model using the manualPath.
        manualPath -- if model shall be loaded manually this is the root path of the model directory

        Returns:
        model
        """

        # manual mode. Load model that is not part of the model dictionary.
        if modelUuid is None:
            classifier = None
            try:
                with open(manualPath + "model", "rb") as f:
                    classifier = pickle.load(f)
            except:
                logging.exception("Could not load model manually")
                return None
            return classifier

        else:
            # Load model from model dictionary
            modelParams = self.get_model_param(modelUuid)
            modelSavePath = modelParams[4]
            modelTypeId = modelParams[0]
            testdata = TestData(modelParams[3], 1, True)

        if not self.does_model_exist(modelUuid):
            raise AttributeError(
                "Model with uuid {0} was not found in model dictionary.".
                format(modelUuid))

        if modelTypeId == "SIFT":
            from classification.local_features.sift import SIFTClassifier
            from classification.model import ModelType
            model = SIFTClassifier(testdata, Settings.E_MODEL_TYPE)
            model = model.load(modelSavePath)

        elif modelTypeId == "SURF":
            from classification.local_features.surf import SURFClassifier
            from classification.model import ModelType
            model = SURFClassifier(testdata, Settings.E_MODEL_TYPE)
            return model.load(modelSavePath)
        elif modelTypeId == "HIST":
            from classification.global_features.histogram import HistogramClassifier
            from classification.model import ModelType
            model = HistogramClassifier(testdata, Settings.E_MODEL_TYPE)
            return model.load(modelSavePath)

        if modelTypeId.startswith("mCL"):
            from classification.late_fusion import MajorityClassifier
            model = MajorityClassifier(testdata)
            try:
                with open(modelSavePath + "model", "r") as f:
                    model = pickle.load(f)
            except:
                logging.exception("Could not load majority classifier.")
                return None
            return model

        # NNs or CNNs
        if modelTypeId.startswith("NN") or modelTypeId.startswith("CNN"):
            from classification.deep.neural_net import *
            # load testdata because we need the output shape
            modelWrapper = NeuralNetClassifier(testdata, modelParams[3])

            # search for best weights
            if not utils.check_if_file_exists(modelSavePath + "model"):
                print "[!] Model file {0} was not found.".format(
                    modelSavePath + "model")

                continue_ = utils.radio_question(
                    "[?]",
                    "It might be possible to restore the model using the weights file. Continue?",
                    None, ["Yes", "No"], [True, False])
                if not continue_:
                    delete = utils.radio_question("[?]", "Delete model?", None,
                                                  ["Yes", "No"], [True, False])
                    if delete:
                        remove_model(modelUuid)
                    raise Exception("Model file does not exist.")

            # try to restore best weights if more recent
            bestWeights = None
            if modelParams[7] == "nn_weights" and utils.check_if_file_exists(
                    modelSavePath + "best_weights"):
                bestWeights = modelSavePath + "best_weights"

            modelWrapper.load_model(modelSavePath + "model", bestWeights)
            # restore params
            modelWrapper.modelSaver.bestLoss = modelParams[5]
            modelWrapper.modelSaver.modelDescription = modelParams[2]
            modelWrapper.modelSaver.modelUuid = modelUuid
            return modelWrapper
        if modelTypeId is None or modelTypeId == "None":
            print "There was a problem loading this model {0}. The save file might be corrupted. Model Dictionary {1}".format(
                modelTypeId, modelParams)
            if utils.radio_question("[?]",
                                    "Repair model with new model type ID?",
                                    None, ["Yes", "No"], [True, False]):
                modelTypeId = utils.value_question("[?]", "Model ID", "s")
                update_model_dict(modelUuid, 0, modelTypeId)
                print "Model Id changed. Restart application and try again."
                raw_input("Press any key to continue.")
                import sys
                sys.exit()
            raise Exception("Could not repair model.")

        else:
            raise Exception(
                "Model {0} is not supported yet.".format(modelTypeId))