Ejemplo n.º 1
0
 def save_file(self, text=None):
     self.log.info(f"Saving '{self.name}' from '{self.url}'")
     self.original_path = f"{DOCS_TO_PROCESS_PATH}{self.extension}/{self.name}"
     self.txt_path = f"{DOCS_TO_PROCESS_PATH}{self.name}.txt"
     create_file(self.original_path)
     with open(self.original_path, 'wb') as f:
         f.write(self.request.content)
Ejemplo n.º 2
0
 def file_to_text(self):
     provisory_file_name = f"/tmp/processor-provisory/{self.name}.txt"
     create_file(provisory_file_name)
     bashCommand = f"pdftotext -layout {self.original_path} {provisory_file_name}"
     os.system(bashCommand)
     with open(provisory_file_name, 'r') as f:
         return f.read()
     return ''
Ejemplo n.º 3
0
 def __init__(self, path=None):
     if path:
         self.logger_path = path
     create_file(self.logger_path)
     logging.basicConfig(
         filename=self.logger_path,
         format='[%(levelname)s]: %(asctime)s - %(message)s',
         level=logging.INFO)
Ejemplo n.º 4
0
def save_text(filename, text, probability):
    """
    if the probability for the most probable language is higher than 40%
    then save the file to that language folder
    """
    if probability.items():
        most_probable = max(probability.items(), key=lambda k: k[1])
        language_slug = most_probable[0]
        if probability_criteria_check(most_probable[1]):
            file_path = f"{PROBABLE_DOCS_PER_LANGUAGE_PATH}{language_slug}/{filename}"
            create_file(file_path)
            with open(file_path, 'w') as f:
                log.info(f"Saving text in '{language_slug}' probable docs folder as '{filename}'")
                f.write(text)
            return 1
    return 0
Ejemplo n.º 5
0
def process_language_bootstrap_files(language):
    base_path = f"{BOOTSTRAP_PATH}{language['slug']}/"
    files = glob.glob(os.path.join(base_path, '*.txt'))
    log.info(f"Boostraping '{language['slug']}' language with {files}")
    if files:
        for filename in files:
            with open(filename, 'r') as f:
                log.info(
                    f"Adding '{os.path.basename(filename)}' words to '{language['slug']}' language assets"
                )
                add_to_language_assets(language['slug'], f.read())
            folder_path, name = os.path.split(filename)
            new_path = f"{folder_path}/processed/{name}"
            create_file(new_path)
            os.rename(filename, new_path)
    else:
        log.warning(
            f"There is no files to add to '{language['slug']}' language assets"
        )
Ejemplo n.º 6
0
 def save_text_file(self, text):
     self.log.info(f"Converting '{self.name}' to '{self.name}.txt'")
     create_file(self.txt_path)
     with open(self.txt_path, 'w') as f:
         text = f"{self.url}\n\n\n\n{text}"
         f.write(text)
 def _create_language_assets(self, path):
     create_folder(path)
     create_file(f"{path}dictionary.json", '{}')
     create_file(f"{path}words_list")
Ejemplo n.º 8
0
def mark_file_as_processed(filename):
    log.info(f"Marking '{os.path.basename(filename)}' as processed")
    new_path = f"{DOCS_PROCESSED_PATH}{os.path.basename(filename)}"
    create_file(new_path)
    os.rename(filename, new_path)