def ckpt_up_to_date(local_ckpt_prefix, bucket, folder): fm = FileManager(bucket) ckpt_names = fm.get_folder_list(folder) most_recent_prefix = most_recent_ckpt_from_list(ckpt_names) if not local_ckpt_prefix == most_recent_prefix: return False return True
def main_menu(self): print("Main menu:") while True: print('Please select one of the following options:\n' '(R -> register, E -> exit, P -> print, F -> find)') user_input = input() if user_input == 'r' or user_input == 'R': std = self.register_student() self._students.append(std) print("Register a new student...") time.sleep(1) FileManager.write_file(r'files\students.txt', std) print("Done.") elif user_input == 'f' or user_input == 'F': self.find_student() elif user_input == 'p' or user_input == 'P': printer = Printer(self._students) # self.print_all_students() printer.show_printer_menu() printer.print_sorted_list(printer.get_user_input()) self.main_menu() else: print("Exiting program...") time.sleep(1) exit()
def __init__(self): QObject.__init__(self) ParallelWorker.__init__(self) settings = Settings() FileManager.__init__(self, sites=settings.sites_path) self.options["multiprocess"] = False self.to_save = None
def read_cleaned_docs(documents_path, category): """ Legge i documenti relativi alla categoria in esame ed effettua la pulizia del testo. :param documents_path: lista contenente i percorsi alle cartelle in cui risiedono i documenti del dataset e i documenti caricati dall'utente. :param category: nome della categoria di interesse. Il nome della cartella contenente i file relativi alla categoria deve coincidere con quello della relativa classe presente nell'ontologia. :return: dizionario contenente i documenti letti. In particolare: - La chiave corrisponde al seguente nome '(category)_doc_(nome_del_documento)'; - Il valore corrisponde al testo pulito. """ file_manager = FileManager() # Collezione dei documenti contenuti nella directory esaminata docs = {} for path in documents_path: # Path della cartella contenente i documenti relativi alla categoria esaminata cat_dir_path = path + '/' + category if os.path.exists(cat_dir_path): # Elenco dei file contenuti nella directory esaminata files = os.listdir(cat_dir_path) # Lettura file e pulizia del testo contenuto for file_name in files: text = file_manager.read_file(cat_dir_path + '/' + file_name) docs[category + '_doc_' + file_name.split('.')[0]] = clean_text(text) else: os.makedirs(cat_dir_path) return docs
def __init__(self, file): fm = FileManager(file) fm.read_param() self.size = fm.size self.gamma = fm.gamma self.noise = fm.noise self.table = fm.table self.count = 0
def __init__(self, dataset_path, user_path, cat_docs, keywords_extractor, text_classifier, entity_recognizer): self._dataset_path = dataset_path self._user_path = user_path self._cat_docs = cat_docs self._ke = keywords_extractor self._tc = text_classifier self._er = entity_recognizer self._file_manager = FileManager()
def get_matrix_from_annotations(s3_bucket_name, annotation_path): if not annotation_path.endswith('.json'): raise ValueError('Cannot read from non json annotation file') fm = FileManager(s3_bucket_name) im_data = fm.read_image_dict('', annotation_path) M = im_data.get('warp', {}).get('M', {}) if not M: return None M = np.array(M, dtype=np.float32).reshape(9) return M
def test(): hmm = import_table() print(hmm.stage_table['AWARE'].next_state) print(hmm.emission_table) fm = FileManager( "D:\\PyProject\\AI_HMM\\AI_HMM\\examples\\hmm_customer_1586733276720.txt" ) emission_list = fm.read_emissions() print(emission_list) print(hmm.assume('ZERO', emission_list))
def _get_file_content(self, filepath): """ Legge il contenuto di un documento testuale. :param filepath: relative path del file da leggere :return: contenuto testuale del file. """ fileManager = FileManager() # Project Root path ROOT_DIR = os.path.abspath(os.path.dirname(__file__)) # Lettura file e restituzione del contenuto return fileManager.read_file(os.path.join(ROOT_DIR, filepath))
def __init__(self, ontology_builder, keyword_extractor, text_classifier, entity_recognizer, dataset_path, user_path): self._ob = ontology_builder self._ke = keyword_extractor self._er = entity_recognizer self._dataset_path = dataset_path self._user_path = user_path # Dizionario contenente la lista di documenti associata ad ogni categoria. In particolare: # - chiave: coincide con il nome della categoria; # - valore: rappresenta un ulteriore dizionazio in cui le coppie (chiave, valore) coincidono con le coppie # (nome_documento, testo_documento). self._cat_docs = {} # Gestore delle operazioni di lettura e scrittura su file. self._file_manager = FileManager() # Processa i documenti forniti in input al sistema dall'utente. self._tp = TextProcessor(dataset_path, user_path, {}, keyword_extractor, text_classifier, entity_recognizer) # Inizializzo il sistema. self._init_system() # Recupero l'ontologia. self._onto = self._ob.get_onto() # Creo un'istanza di Query Builder per eseguire query SPARQL. self._query_builder = QueryBuilder()
def test_init(self): _ = FileManager(file_src_dir=self.FILE_SRC_DIR, symbolic_link_dst=self.SYMBOLIC_LINK_DST) assert os.path.exists(f"{self.FILE_SRC_DIR}/published") assert os.path.exists(f"{self.SYMBOLIC_LINK_DST}/unpublished") shutil.rmtree(self.FILE_SRC_DIR) os.remove(self.SYMBOLIC_LINK_DST)
def _load_json_files(self): arrayjson_kpis = [] for kpi in Config.JOBS_NAMES: for date in self.daterange: data = FileManager.read_from_json_file(Config.WORKDIRECTORY_FOR_KPIS.format(date=date), kpi) if data: arrayjson_kpis.append(data) return arrayjson_kpis
def download_ckpt_to_dir(bucket, folder, dest_dir): ''' download most recent ckpt files (index, meta, data) from s3 bucket and directory. dest: local folder to put files into returns: path/to/ckpt_prefix ''' fm = FileManager(bucket) # need .data and .index files (don't necessarily need meta, but will download) ckpt_names = fm.get_folder_list(folder) most_recent_ckpt_prefix = most_recent_ckpt_from_list(ckpt_names) print('Downloading ckpts from s3: {}'.format(most_recent_ckpt_prefix)) ckpt_file_names = [x for x in ckpt_names if most_recent_ckpt_prefix in x] path_and_prefix = dest_dir + '/' + most_recent_ckpt_prefix for key in ckpt_file_names: dest_filepath = dest_dir + '/' + key fm.download_file(folder, key, dest_filepath) return path_and_prefix
def run(self, collections): Log.Instance().appendFinalReport( "\nStarting WRITING stage...\n===================") for collection in collections: Log.Instance().append("Writing " + collection['etl_meta']['label'] + " for " + collection['etl_meta']['timestamp'] + "...") if collection['etl_meta']['is_kpi']: filepath = Config.WORKDIRECTORY_FOR_KPIS else: filepath = Config.WORKDIRECTORY_FOR_TEMPS filepath = filepath.format( date=collection['etl_meta']['timestamp'][0:10]) FileManager.create_if_dont_exist(filepath) print(collection) FileManager.write_json_to_file(filepath, collection['etl_meta']['label'], collection) Log.Instance().appendFinalReport( "===================\nWRITING stage ended.")
def create_app(): """ Initialize Flask and setup database """ project_dir = os.path.dirname(os.path.abspath(__file__)) app = CustomFlask(__name__) app.config.from_mapping( SECRET_KEY=os.urandom(16), CACHED_TIME=time.time(), THEME_DIR="./data/theme.json", REAL_MARKDOWN_DIR=os.path.join(project_dir, "./data/topaz_docs"), MARKDOWN_DIR="./data/docs", FILE_MANAGER=FileManager( file_src_dir=os.path.join(project_dir, "./data/topaz_docs") + "/", symbolic_link_dst="./data/topaz_docs")) CORS(app, supports_credentials=True) # Init database import db db.init_db() # Init github cache from utils.github import update_public_repos db_conn = db.get_db() update_public_repos(db_conn) # Init meidum cache db_conn = db.get_db() with open(app.config["THEME_DIR"], "r") as f: data = json.load(f) medium_url = data["nav_bar_footer"]["medium"]["link"] from utils.medium import update_articles update_articles(db_conn, medium_url) # Register blueprints import home app.register_blueprint(home.bp) import auth app.register_blueprint(auth.bp) # Initialize login manager login_manager = LoginManager() login_manager.init_app(app) @login_manager.user_loader def user_loader(username): db_conn = db.get_db() return get_user(db_conn, username) return app
def extract_training_data(s3_bucket_name, top_dir): fm = FileManager(s3_bucket_name) annotation_dir = os.path.join(top_dir, 'annotations') frames_dir = os.path.join(top_dir, 'frames') annotations_names = fm.get_folder_list(annotation_dir, extension_filter='json') frame_names = [x for x in fm.get_folder_list(frames_dir) if len(x.split('.')) == 2] frame_exts = set([x.split('.')[1] for x in frame_names]) frame_names = set(frame_names) data = [] for filename in annotations_names: # if not json file, skip if not filename.split('.')[1] == 'json': continue # strange behavior with os.path.join # depending on whether or not looking in s3 bucket if s3_bucket_name is not None: annotation_filename = annotation_dir + filename else: annotation_filename = os.path.join(annotation_dir, filename) M = get_matrix_from_annotations(s3_bucket_name, annotation_filename) if M is not None: filename_noext = filename.split('.')[0] for ext in frame_exts: frame_name = filename_noext + '.' + ext if frame_name in frame_names: # same strange behavior if s3_bucket_name is not None: frame_path_from_bucket = top_dir + '/frames' + frame_name else: frame_path_from_bucket = os.path.join(top_dir, 'frames', frame_name) data.append((frame_path_from_bucket, M)) return data
def api_proxy(action: str, kwargs: dict) -> str: serving_storage_dir = os.getenv('SERVING_STORAGE_DIR', tempfile.gettempdir()) logging.info(f'serving_storage_dir is {serving_storage_dir}') model_config_dir = os.path.join(serving_storage_dir, 'configs') model_storage_dir = os.path.join(serving_storage_dir, 'models') model_config_manager = ModelConfigManager(model_config_dir) file_manager = FileManager(model_storage_dir) try: if action == 'list': configs = model_config_manager.list() return json.dumps(configs) elif action == 'register': model_config_manager.register(**kwargs) elif action == 'update': model_base_path = file_manager.copy_model_to_serving(**kwargs) kwargs['model_base_path'] = model_base_path model_config_manager.update(**kwargs) elif action == 'delete': file_manager.delete_model_from_serving(**kwargs) model_config_manager.delete(**kwargs) return 'Done.' except Exception as err: return f'Failed to {action}, because {err}, {traceback.format_exc()}'
def main(): print("Testing program: Student registration\n") students = FileManager.read_file(r'files\students.txt') menu = Menu(students) menu.main_menu()
if __name__ == '__main__': server = 'https://www.80s.tw/' url = 'https://www.80s.tw/movie/list' headers = { 'Referer': 'https://www.80s.tw/movie/list', 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Mobile Safari/537.36' } r = requests.get(url=url, headers=headers) bf = BeautifulSoup(r.text, 'lxml') clearfix = bf.find(class_='me1 clearfix') clearfix_bf = BeautifulSoup(str(clearfix), 'lxml') div_a = clearfix_bf.find_all('a') fm = FileManager() for a in div_a: a_bf = BeautifulSoup(str(a), 'lxml') href = a_bf.a.get('href') url_info = server + href print('详情链接', server + href) #迅雷谜中电影详情链接 info = requests.get(url=url_info, headers=headers) info_bf = BeautifulSoup(info.text, 'lxml') minfo = info_bf.find('div', class_='clearfix', id='minfo') img_info = info_bf.find('div', class_='img') name = '' src_url = '' if img_info is not None:
from utils.display import Display from utils.file_manager import FileManager from utils.warp_tools import * from utils.rink_specs import HockeyRink import random import cv2 fm = FileManager('bsivisiondata') d = Display() annotations = fm.get_folder_list('PHI-PIT_6m-8m/annotations', extension_filter='json') # random.shuffle(annotations) for f in annotations: print f im_dict = fm.read_image_dict('PHI-PIT_6m-8m/annotations', f) if not 'warp' in im_dict: continue imname = f.split('.')[0] + '.png' im = fm.read_image_file('PHI-PIT_6m-8m/frames', imname) im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) H = np.array(im_dict['warp']['M']) hr = HockeyRink() scaled_H = scale_homography(H, 600, 300) H1280 = scale_homography(H, 1280, 720) ''' NEEDED TO BE RESIZING IMAGES BEFORE CALLING WARP!!! '''
class TextProcessor: """ Processa un testo fornito in input al sistema dall'utente. """ def __init__(self, dataset_path, user_path, cat_docs, keywords_extractor, text_classifier, entity_recognizer): self._dataset_path = dataset_path self._user_path = user_path self._cat_docs = cat_docs self._ke = keywords_extractor self._tc = text_classifier self._er = entity_recognizer self._file_manager = FileManager() def set_cat_docs(self, cat_docs): self._cat_docs = cat_docs def process_text(self, text): """ Prende un testo come parametro di input e utilizza le API di Meaning Cloud per effettuare l'estrazione della categoria di appartenenza (con relativo score) e delle API di Dandelion per l'estrazione delle entità contenute. Inoltre, utilizzando il TFIDF viene effettuata anche l'estrazione delle relative keywords. :param text: testo da processare :return: - cat: dizionario contenente il nome della categoria in cui è stato classificato il documento e il relativo score; - fname: nome del file in cui è stato salvato il testo passato come parametro in ingresso; - doc_keys: keywords del nuovo documento; - doc_ents: entità contenute nel nuovo documento; """ # Uso del classificatore (API Meaning Cloud) per recuperare la categoria a cui appartiene il documento # insieme alla relativa percentuale di appartenenza (score). cat = self._tc.get_category(text) cat_name = cat['name'] # Salvataggio documento nella relativa cartella utente. fname = self._save_doc(cat_name, text) # Pulizia testo cleaned_text = clean_text(text) # Estrazione keywords associate al testo doc_keys = self._extract_keywords(cat_name, fname, cleaned_text) # Estrazione entità associate al testo doc_ents = self._er.get_entities(cleaned_text) return cat, fname, doc_keys, doc_ents def _save_doc(self, cat_name, text): """ Salvataggio del documento nella cartella utente relativa alla categoria in cui è stato classificato il documento. :param cat_name: categoria a cui appartiene il documento. Il nome della cartella in cui salvare il documento deve coincidere con il nome della categoria estratta dal classificatore. :param text: testo da salvare nel documento. :return: nome del file in cui è stato salvato il testo. """ user_cat_dir_path = self._user_path + '/' + cat_name # Se non esiste la cartella contenente i file caricati dall'utente, relativi alla categoria in esame, la creo. if not os.path.exists(user_cat_dir_path): os.makedirs(user_cat_dir_path) # Costruzione del nome da assegnare al documento in cui salveremo il testo passato come parametro di ingresso. # Conto i file contenuti all'interno della cartella relativa alla categoria a cui appartiene il documento num_docs = len(os.listdir(user_cat_dir_path)) # Aggiungo 1 e ottengo, così, il nome da assegnare al nuovo documento. fname = 'user_' + str(num_docs + 1) + '.txt' self._file_manager.write_file(user_cat_dir_path + '/' + fname, text) return fname def _extract_keywords(self, cat_name, fname, cleaned_text): """ Estrazione delle keyword relative al testo passato come parametro di input. :param cat_name: nome della categoria a cui appartiene il testo classificato. :param fname: nome del file in cui è stato salvato il testo inziale. :param cleaned_text: testo pulito. :return: keywords relative al documento in esame. """ # Se non si dispone della collezione di documenti relativi alla categoria in esame, viene effettuata la relativa # lettura e la pulizia. if not cat_name in self._cat_docs: print("lettura documenti...", end=" ") self._cat_docs[cat_name] = read_cleaned_docs( [self._dataset_path, self._user_path], cat_name) print("completata.") # Recupero tutti i documenti relativi alla categoria a cui appartiene il nuovo documento docs = self._cat_docs[cat_name] # Estrazione delle keywords relative al documento in esame doc_istance_name = cat_name + '_doc_' + fname.split('.')[0] docs[doc_istance_name] = cleaned_text doc_keys = self._ke.extract(docs)[doc_istance_name] return doc_keys
from utils.file_manager import FileManager SERVER_PORT = "28888" MAX_SIM_THREADS = 2 DEFAULT_QUEUE_CHECK_INTERVAL = 2 DEFAULT_THREAD_CHECK_INTERVAL = .2 DEFAULT_REGION = "us" CONTROLLER_MODULES = [ 'rest_controllers', 'socket_controllers', ] # Logger config fm = FileManager() logger_settings = { "DEFAULT_DEBUG": fm.stdout, "DEFAULT_LOG": fm.stdout, "DEFAULT_ERR": fm.stderr, "DEFAULT_WARN": fm.stderr, "DEFAULT_NULL": fm.null, "INCLUDE": { "debug": False, "log": True, "err": True, "warn": True, "null": True, }, "TIMESTAMP": True, }
def fm(self): yield FileManager(file_src_dir=self.FILE_SRC_DIR, symbolic_link_dst=self.SYMBOLIC_LINK_DST) shutil.rmtree(self.FILE_SRC_DIR) os.remove(self.SYMBOLIC_LINK_DST)
from utils.file_manager import FileManager from utils.engine import Engine B, L, D, libraries, books_scores, picked_books = FileManager.read_file('a_example.txt') engine = Engine(libraries=libraries, D=D, books_scores=books_scores, picked_books=picked_books) output: list = engine.start() FileManager.write_file('a.txt', output)