def load_all(update, queue): series_names = update.keys() for n in series_names: show = update[n] for s in show['seasons']: for e in s['episodes']: if e['delete']: e = File(old_location=e['location'], s_nr=e['s_nr'], e_nr=e['e_nr'], series_name=n, title=e['title'], title2=e['title2'], title3=e['title3'], episode_option=e['episode_option'], name_needed=e[NAME_NEEDED], delete=True, anime=SHOWS[n].anime) queue.append(e) elif e['save']: e = File(old_location=e['location'], s_nr=e['s_nr'], e_nr=e['e_nr'], s_nr_old=e['s_nr_old'], e_nr_old=e['e_nr_old'], series_name=n, title=e['title'], title2=e['title2'], title3=e['title3'], episode_option=e['episode_option'], name_needed=e[NAME_NEEDED], anime=SHOWS[n].anime) syncer.queue_episode(e)
def fetch(url, thread): if gl.done_urls.has_key(to_md5_str(url)): return http_or_https, domain, relative_url = Spider.get_url_tuple(url) new_url_id = gl.db.collection(gl.DB_COUNT).find_one({"name": gl.DB_URLS_NAME})["count"] + 1 new_document_path = gl.DOCUMENT_ROOT_PATH + domain + "\\" new_document_name = new_url_id.__str__() + ".html" url_dict = { "id": new_url_id, "url": HttpRequest.quote(url), "http_or_https": http_or_https, "domain": domain, "relative_url": relative_url, "document_path": new_document_path, "document_name": new_document_name, "create_time": get_time_million(), } gl.db.collection(gl.DB_URLS_NAME).insert(url_dict) gl.db.collection(gl.DB_COUNT).update({"name": gl.DB_URLS_NAME}, {"$inc": {"count": 1}}) page = HttpRequest.get_url_content(url) gl.done_urls[to_md5_str(url)] = 1 File.makedir(new_document_path) File.write_text_to_file(new_document_path + new_document_name, page) new_urls = Spider.get_page_hrefs(page) for i in range(len(new_urls)): new_url = { "id": gl.db.collection(gl.DB_COUNT).find_one({"name": gl.DB_TODO_URLS_NAME})["count"] + 1, "url": new_urls[i], } if gl.db.collection(gl.DB_TODO_URLS_NAME).find({url: new_urls[i]}).count() <= 0: gl.db.collection(gl.DB_TODO_URLS_NAME).insert(new_url) gl.db.collection(gl.DB_COUNT).update({"name": gl.DB_TODO_URLS_NAME}, {"$inc": {"count": 1}}) thread.task = None thread.working = False
def test_compiler(self): comp_true = TEST_DATA['compile_truth'] for t1 in ['Brian', '']: for t2 in ['Roger', '', 'Brian']: for t3 in ['John', '', 'Brian', 'Roger']: for eo in [SINGLE, DOUBLE, TRIPLE]: for a in [True, False]: for se in [[2, 1], [20, 10], [2, 100]]: if se[1] == 100 and not a: continue f = File( s_nr=se[0], e_nr=se[1], series_name='Freddie', title=t1, title2=t2, title3=t3, episode_option=eo, anime=a) f.extension = 'mkv' comp = Episode.compile_file_name(None, f) s = ''.join([t1[0] if t1 else '_', t2[0] if t2 else '_', t3[0] if t3 else '_', 'A' if a else '_', eo[0], str(len(str(se[0])) + 3), str(len(str(se[1])))]) # print(s) self.assertEqual(comp, comp_true[s])
def test_validate_number_in_interval_double(self): prod, cons = app.run(Config.K_MONITOR_TEST_TOPIC, Config.PS_DATABASE_NAME, Config.PS_TEST_WEBSITE_TABLE_NAME, "tests/t_monitor_heavy_test.yml") interval = File.read_time_interval("tests/t_monitor_heavy_test.yml") time.sleep(interval * 2) app.stop_monitor(prod, cons) admin_client = KafkaAdminClient( bootstrap_servers=[Config.K_HOST + ':' + Config.K_PORT], security_protocol=Config.K_SECURITY_PROTOCOL, ssl_cafile=Config.K_SSL_CAT_FILE, ssl_certfile=Config.K_SSL_CERT_FILE, ssl_keyfile=Config.K_SSL_KEY_FILE) admin_client.delete_topics([Config.K_MONITOR_TEST_TOPIC]) monitors = File.read_monitors("tests/t_monitor_heavy_test.yml") #send messages equals total urls count in 2 cycle is double the urls size self.assertEqual(prod.get_message_count(), len(monitors) * 2)
def check(self): if not SYSTEM["monitor"]["SOURCE"]: message = "Source monitor is disable, check your config!" self.logger.error(message) print message time.sleep(60) exit(0) ancestor_thread_list = [] file = File() profile_list = file.get_check_list() profile_list = profile_list[0:len(profile_list)-1] if(profile_list): for line in profile_list.split('\n'): self.logger.debug("Last Check : %s"%(line)) profile = json.loads(line) while threading.activeCount() > profile['thread']: time.sleep(1) t = threading.Thread(target=self.check_source,args=(profile['source'], profile['status'], profile['pa_id'], profile['agent'], profile['name'], profile['type'], ) ) t.start() #time.sleep(30) ancestor_thread_list.append(t) for ancestor_thread in ancestor_thread_list: ancestor_thread.join() time.sleep(10)
def prep_file(name, root): extension = name.split('.')[-1].lower() if extension in EXTENSIONS: if 'sample' in name.lower(): return None return File(old_location=os.path.join(root, name), subs=False) if extension in SUBS: return File(old_location=os.path.join(root, name), subs=True)
def block_code(self, block_position): """ Get code block :param block_position: 0:up 1:down 2:line 3:in-function :return: """ if block_position == 2: if self.line is None or self.line == 0: logger.error("[AST] Line exception: {0}".format(self.line)) return False line_rule = '{0}p'.format(self.line) code = File(self.file_path).lines(line_rule) if code is not False: code = code.strip() return code else: block_start = 1 block_end = 0 functions = self.functions() if functions: for function_name, function_value in functions.items(): if int(function_value['start']) < int(self.line) < int(function_value['end']): in_this_function = '<---- {0}'.format(self.line) if block_position == 0: block_start = function_value['start'] block_end = int(self.line) - 1 elif block_position == 1: block_start = int(self.line) block_end = int(function_value['end']) - 1 elif block_position == 3: block_start = function_value['start'] block_end = function_value['end'] logger.debug( "[AST] [FUNCTION] {0} ({1} - {2}) {3}".format(function_name, function_value['start'], function_value['end'], in_this_function)) else: if block_position == 0: block_start = 1 block_end = int(self.line) - 1 elif block_position == 1: block_start = int(self.line) + 1 block_end = sum(1 for l in open(self.file_path)) elif block_position == 3: block_start = 1 block_end = sum(1 for l in open(self.file_path)) logger.debug("[AST] Not function anything `function`, will split file") # get param block code line_rule = "{0},{1}p".format(block_start, block_end) code = File(self.file_path).lines(line_rule) logger.debug('[AST] [BLOCK-CODE-LINES] {0} - {1}p'.format(block_start, block_end)) return code
def queue_errors(errors, queue): for error in errors: series_name = error[SERIES_NAME] if 'exception' in error and error['exception']: e_id = f'{series_name} {error["s_nr"]:02d}x{error["e_nr"]:0{3 if error["anime"] else 2}d}' if error['exception'] not in [ 'part', 'double', 'lower_general', 'title_match' ]: if e_id not in EXCEPTIONS[error['exception']]: EXCEPTIONS[error['exception']][e_id] = [] EXCEPTIONS[error['exception']][e_id].append(error['word']) EXCEPTIONS[error['exception']][e_id] = sorted( list(set(EXCEPTIONS[error['exception']][e_id]))) elif error['exception'] == 'double': if series_name not in EXCEPTIONS[error['exception']]: EXCEPTIONS[error['exception']][series_name] = [] EXCEPTIONS[error['exception']][series_name].append( error['title']) EXCEPTIONS[error['exception']][series_name] = sorted( list(set(EXCEPTIONS[error['exception']][series_name]))) elif error['exception'] == 'lower_general': EXCEPTIONS[error['exception']].append(error['word']) EXCEPTIONS[error['exception']] = sorted( list(set(EXCEPTIONS[error['exception']]))) else: EXCEPTIONS[error['exception']].append(e_id) EXCEPTIONS[error['exception']] = sorted( list(set(EXCEPTIONS[error['exception']]))) continue if error['delete']: err = File(old_location=error['old_location'], s_nr=error['s_nr'], e_nr=error['e_nr'], series_name=series_name, title=error['title'], episode_option=error['episode_option'], name_needed=error[NAME_NEEDED], delete=True, anime=SHOWS[series_name].anime) queue.append(err) elif error['save']: err = File(old_location=error['old_location'], s_nr=error['s_nr'], e_nr=error['e_nr'], s_nr_old=error['s_nr_old'], e_nr_old=error['e_nr_old'], series_name=series_name, title=error['title'], episode_option=error['episode_option'], name_needed=error[NAME_NEEDED], anime=SHOWS[series_name].anime) syncer.queue_episode(err)
def send_structure(self, file_id): if not self.connected: return False try: file = self.files[file_id] if self.files[file_id] else File( file_id, '') except IndexError: file = File(file_id, '') hash_structure = self.merkle_tree.get_structure_with_file(file) structure_json = node_to_json(hash_structure) self.send_bytes_secure(bytes(structure_json, encoding='utf-8')) return True
def run(topic: str, db: str, table: str, filepath=None): if not filepath: filepath = Config.MONITERFILE interval = File.read_time_interval(filepath) monitors = File.read_monitors(filepath) producer = Producer(topic, interval) consumer = Consumer(topic, db, table) start_mointoring(monitors, producer, consumer) return producer, consumer
def loop(): date = "{}-{}-{}".format(ds.Day(), ds.Month(), ds.Year()) time = "{}-{}-{}".format(ds.Hour(), ds.Minute(), ds.Second()) file_name = "{}/data-{}.txt".format(MOUNT_DIR, date) io = File(path=file_name) # read data from sensor dht # and save to variable data. data = sensor.read_dht(time) # Write data to file io.save(data) # Delay for 5 Second sleep(5)
def __init__(self): self.parser = reqparse.RequestParser() self.parser.add_argument('text', required=False, type=str) self.parser.add_argument('ID', required=False, type=int) logging.basicConfig(level=logging.INFO) self.logger = logging.getLogger("Help") self.file = File()
def fetch(url, thread): if gl.done_urls.has_key(to_md5_str(url)): return http_or_https, domain, relative_url = Spider.get_url_tuple(url) new_url_id = gl.db.collection(gl.DB_COUNT).find_one( {"name": gl.DB_URLS_NAME})["count"] + 1 new_document_path = gl.DOCUMENT_ROOT_PATH + domain + "\\" new_document_name = new_url_id.__str__() + ".html" url_dict = { "id": new_url_id, "url": HttpRequest.quote(url), "http_or_https": http_or_https, "domain": domain, "relative_url": relative_url, "document_path": new_document_path, "document_name": new_document_name, "create_time": get_time_million(), } gl.db.collection(gl.DB_URLS_NAME).insert(url_dict) gl.db.collection(gl.DB_COUNT).update({"name": gl.DB_URLS_NAME}, {"$inc": { "count": 1 }}) page = HttpRequest.get_url_content(url) gl.done_urls[to_md5_str(url)] = 1 File.makedir(new_document_path) File.write_text_to_file(new_document_path + new_document_name, page) new_urls = Spider.get_page_hrefs(page) for i in range(len(new_urls)): new_url = { "id": gl.db.collection(gl.DB_COUNT).find_one( {"name": gl.DB_TODO_URLS_NAME})["count"] + 1, "url": new_urls[i], } if gl.db.collection(gl.DB_TODO_URLS_NAME).find({ url: new_urls[i] }).count() <= 0: gl.db.collection(gl.DB_TODO_URLS_NAME).insert(new_url) gl.db.collection(gl.DB_COUNT).update( {"name": gl.DB_TODO_URLS_NAME}, {"$inc": { "count": 1 }}) thread.task = None thread.working = False
def prep(data): base = create_location(data[SERIES_NAME], data['anime']) if not base: return False show = Series(series_name=data[SERIES_NAME], location=base, tvdb_id=data[TVDB_ID], premiere=data[PREMIERE], final=data[FINAL], status=data[STATUS], name_needed=data[NAME_NEEDED]) REPORT['info'].append('Series Name: ' + show.series_name) REPORT['info'].append('Status: ' + show.status) for f in data['files']: if not f['s_nr'] or not f['e_nr']: continue f = File(old_location=os.path.join(FILE_DIR, f['location']), series_name=show.series_name, s_nr=f['s_nr'], e_nr=f['e_nr'], title=f['title'], title2=f['title2'], title3=f['title3'], episode_option=f['episode_option'], subs=f['sub'], anime=show.anime) folder = make_season_folder(f.s_nr, show.location) if not folder: return False name = Episode.compile_file_name(None, file=f) if f.subs: f.location = os.path.join(SUB_DIR, name) else: f.location = os.path.join(folder, name) QUEUE.append(f) return show
def check_source(self, source, last_status, id, agent, thread, name, type): """ Get status of profile, if stastus not change then update check equal 1. Ffmpeg: Use Ffprobe to check stastus profile (source) and return flag 0 is down 1 is up 2 is video error 3 is audio eror """ ffmpeg = Ffmpeg() check = ffmpeg.check_source(source) # print "%s : %s"%(check, last_status) self.logger.debug("Curent :%s <> Last: %s" % (check, last_status)) if check != last_status: json_data = """{"source":"%s","status":%s,"pa_id":%s,"agent": "%s","thread":%s,"name":"%s","type":"%s"}""" % ( source, last_status, id, agent, thread, name, type) file = File() replicate = file.append_to_check_list(json_data) if not replicate: self.logger.info("Doubt curent %s <> Last %s : %s" % (check, last_status, str(json_data)))
def test_db(self): aiven_results = 0 try: psql_conn = Database(Config.PS_DATABASE_NAME) print("DB Connected!") query = """ CREATE TABLE IF NOT EXISTS """ + Config.PS_TEST_WEBSITE_TABLE_NAME + """ ( name varchar(255) NOT NULL, url varchar(255) NOT NULL, status_code integer NOT NULL, reason varchar(255) NOT NULL, response_time decimal NOT NULL, checked_at timestamp NOT NULL, pattern varchar(255), has_pattern boolean DEFAULT FALSE, PRIMARY KEY(url, checked_at) ) """ psql_conn.query(query) print("Table created successfully in PostgreSQL ") query = "DELETE FROM " + Config.PS_TEST_WEBSITE_TABLE_NAME + " WHERE url = 'https://aiven.io'" psql_conn.query(query) prod, cons = app.run(Config.K_MONITOR_TOPIC, Config.PS_DATABASE_NAME, Config.PS_TEST_WEBSITE_TABLE_NAME, "tests/t_monitor_db.yml") print("all 'aiven.io' is deleted from PostgreSQL ") interval = File.read_time_interval("tests/t_monitor_corrupted.yml") time.sleep(interval - 1) app.stop_monitor(prod, cons) query = "SELECT * FROM " + Config.PS_TEST_WEBSITE_TABLE_NAME + " WHERE url = 'https://aiven.io'" cursor = psql_conn.query(query) aiven_results = cursor.fetchall() psql_conn = Database(Config.PS_DATABASE_NAME) query = "DROP TABLE " + Config.PS_TEST_WEBSITE_TABLE_NAME psql_conn.query(query) psql_conn.close() except Exception as error: print("Error while connecting to PostgreSQL", error) self.assertEqual(len(aiven_results), 1)
def queue_episode(file): name = Episode.compile_file_name(None, file=file) base_path = get_base_path(file) file.location = os.path.join(base_path, name) if file.subs: for sub in file.subs: QUEUE.append(File(old_location=sub, series_name=file.series_name, location=os.path.join( SUB_DIR, '{}.{}'.format(name.rsplit('.', 1)[0], sub.rsplit('.', 1)[1])))) QUEUE.append(file) return QUEUE
def decompress(): '''批量解压fic文件并返回解压后的图片''' if model.quality_level != 'medium': model.switch_quality_level('medium') # 获取文件对象 files = request.files.getlist('files') ret = [] for rawfile in files: # 获取fic对象 fic = File.load_binary(rawfile) file = File(rawfile) data = model.decode(feat=fic['feat'], tex=fic['tex'], intervals=fic['intervals']) # 获取完整结果图 x_output = data['recon'] + data['resi_decoded'] # 保存结果图片 # file_name = file.name_suffix('fic', ext='.bmp') file_name = file.name_suffix('fic', ext=fic['ext']) file_path = get_path(file_name) save_image(x_output, file_path) # 待返回的结果数据 result = { 'name': file_name, 'data': get_url(file_name), 'size': path.getsize(file_path), } ret.append(result) # 响应请求 response = jsonify(ret) return response
def run_client(default_ssl_impl=True): client = Client(default_ssl_impl) client.start() salt = ''.join(['a' for _ in range(32)]) password = input('Enter a password: '******'utf-8'), bytes(salt, encoding='utf-8')) secret_box = SecretBox(key) print('Ready to serve:') while client.connected: cmd = input('> ') tokens = cmd.split(' ') try: if len(cmd) == 0: client.disconnect() if tokens[0] == 'exit': client.exit() elif tokens[0] == 'send': fid = int(tokens[1]) data = str(' '.join(tokens[2:])) if len(data) == 0: print('No data provided.') continue encrypted_data = secret_box.encrypt(bytes(data, encoding='utf-8'), encoder=HexEncoder) client.send_file(File(fid, encrypted_data.decode('utf-8'))) elif tokens[0] == 'get': fid = int(tokens[1]) result = client.request_file(fid) if result and result.file_id != fid: print('Incorrect file received.') elif result: decrypted = secret_box.decrypt(bytes(result.data, encoding='utf-8'), encoder=HexEncoder) print(decrypted.decode('utf-8')) else: print('No data received.') else: print('Command not recognized.') except ValueError: print('Incorrect argument type.') except IndexError: print('Incorrect number of arguments.') print('Disconnected')
def test_producer_equal_consumer(self): prod, cons = app.run(Config.K_MONITOR_TEST_TOPIC, Config.PS_DATABASE_NAME, Config.PS_TEST_WEBSITE_TABLE_NAME) interval = File.read_time_interval() time.sleep(interval - 1) app.stop_monitor(prod, cons) admin_client = KafkaAdminClient( bootstrap_servers=[Config.K_HOST + ':' + Config.K_PORT], security_protocol=Config.K_SECURITY_PROTOCOL, ssl_cafile=Config.K_SSL_CAT_FILE, ssl_certfile=Config.K_SSL_CERT_FILE, ssl_keyfile=Config.K_SSL_KEY_FILE) admin_client.delete_topics([Config.K_MONITOR_TEST_TOPIC]) self.assertEqual(prod.get_message_count(), cons.get_message_count())
def compress(): '''批量压缩图片并返回压缩结果''' if model.quality_level != 'medium': model.switch_quality_level('medium') # 获取文件对象 files = request.files.getlist('files') ret = [] for rawfile in files: file = File(rawfile) # 将二进制转为tensor input = file.load_tensor().cuda() data = model.encode(input) # 保存压缩数据 fic_name = f'{file.name}.fic' fic_path = get_path(fic_name) File.save_binary( { 'feat': data['feat'], 'tex': data['tex'], 'intervals': data['intervals'], 'ext': file.ext, }, fic_path) fic_size = path.getsize(fic_path) # 获取原图大小 input_path = get_path(file.name_suffix('input', ext='.bmp')) save_image(input, input_path) input_size = path.getsize(input_path) fic_compression_ratio = fic_size / input_size # 待返回的结果数据 result = { 'name': fic_name, 'data': get_url(fic_name), 'size': fic_size, 'compression_ratio': fic_compression_ratio, } ret.append(result) # 响应请求 response = jsonify(ret) return response
def main(args): global SHOWS parse_args(args) data = load_json(os.environ[CONF_FILE]) save_json(data, 'data/syncer.json') SHOWS = load_shows() if SHOWS is None: save_json({'shows_locked': True}, os.environ[OUT_FILE]) print('shows locked') return save_json(data, 'data/sync') files = [] for f in data: f = File(old_location=os.path.join(FILE_DIR, f['location']), sync=f['sync'], s_nr=f['s_nr'], e_nr=f['e_nr'], series_name=f[SERIES_NAME], title=f['title'], title2=f['title2'], title3=f['title3'], episode_option=f['e_o']['s'], override=f['override'], delete=f['delete'], subs=f['subs'], type_option=f['t_o']['s'], status=f['status_o']['s'], new_series=f['new_series'], name_needed=True if f['name_o']['s'] == 'Name required' else False, tvdb_id=f['tvdb_id'] if not f['tvdb_id'] == 0 else 0, anime=True if f['new_series'] and f['anime_o']['s'] == 'Anime: Yes' else False) if f.new_series: create_new_series(f) files.append(f) for file in files: if file.delete: QUEUE.append(file) continue if file.type_option == '[ignore]': ignore_file(file) continue if not file.sync: continue if file.type_option in ['HD', 'SD']: queue_movie(file) continue if file.type_option == 'Series': file.anime = SHOWS[file.series_name].anime queue_episode(file) continue sync_queue() clean_up() report = [] for file in QUEUE: report.append(file.get_report()) log = load_json(os.path.join( os.path.dirname(os.environ[OUT_FILE]), 'synclog')) if not log: log = [] log.extend(report) save_json(report, os.environ[OUT_FILE]) save_json(log, os.path.join(os.path.dirname( os.environ[OUT_FILE]), 'synclog')) print(json.dumps(report, indent=4, sort_keys=True)) save_shows(SHOWS)
class SentenceParser: def __init__(self): self.file = File() self.stemmer = SnowballStemmer("english") logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) def extract_feature_from_doc(self, line): features = [] for (text, category, answer) in line: sent_features = self.extract_feature(text) # features.append((sent_features, category)) features.append((sent_features, answer)) # print(datetime.datetime.now()) print('Features of Answer: ', (sent_features, answer)) # print(datetime.datetime.now()) return features def extract_feature(self, text): print("\nQUESTION: ", text) words = self.preprocess(text) print("After Preprocess: ", words) # YOUR CODE HERE # Tag words tagged_words = [nltk.pos_tag(word_tokenize(words))] # Extract keys keys = self.extract_keys(tagged_words) # Stemming stemmed_words = [self.stemmer.stem(key) for key in keys] return self.get_feature_set(stemmed_words) def preprocess(self, sentence): # YOUR CODE HERE # make all lower case sentence = sentence.lower() #tokenize / segment the words & remove punctuations tokenizer = RegexpTokenizer(r'\w+') tokens = tokenizer.tokenize(sentence) # stop words set_of_stopwords = set(stopwords.words('english')) filtered_words = [ word for word in tokens if not word in set_of_stopwords ] # join the words return " ".join(filtered_words) def extract_keys(self, sentences): sent_keys = [] for sent in sentences: keys = [ x for (x, n) in sent if n == 'NN' or n == 'NNS' or n == 'VBN' or n == 'VBP' or n == 'RB' or n == 'VBZ' or n == 'VBG' or n == 'PRP' or n == 'JJ' ] if len(keys) == 0: keys = [x for (x, n) in sent] sent_keys.extend(keys) return sent_keys def get_feature_set(self, sent_keys): return {'keywords': ' '.join(sent_keys)} def get_content(self, fileName): with self.file.read(get_resource(fileName)) as content_file: lines = csv.reader(content_file, delimiter='|') res = [x for x in lines if len(x) == 3] return res
def __init__(self): self.file = File() self.stemmer = SnowballStemmer("english") logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
# reconstruct face by feature b_layer = DeconvRecon().eval() b_layer = b_layer.cuda() b_layer = nn.DataParallel(b_layer).cuda() b_param = torch.load('../params/deconv_recon/30w/baseLayer_7.pth', map_location='cuda:0') b_layer.load_state_dict(b_param) e_layer = GdnModel().eval().cuda() e_layer = CustomDataParallel(e_layer).cuda() c_param = torch.load('../params/gdn_model/5120/enhanceLayer_7.pth', map_location='cuda:0') # c_param = torch.load('../params/gdn_model/1024/gdnmodel_10.pth', map_location='cuda:0') e_layer.load_state_dict(c_param) file_path = sys.argv[1] file = File(file_path) file.load_tensor() with torch.no_grad(): input = file.tensor input = input.cuda() feat = resnet(input) # process feat shape to [N, 512, 1, 1] feat = torch.squeeze(feat, 1) feat = torch.unsqueeze(feat, 2) feat = torch.unsqueeze(feat, 3) feat = feat.cuda() # reconstruct feature image recon = b_layer(feat)
#!/usr/bin/env python # encoding: utf-8 import sys from utils.settings import Settings from utils.words.dictionary import Dictionary from utils.file import File from utils.http.iltec import PortalLinguaPortuguesa reload(sys) sys.setdefaultencoding("utf-8") def get_words(): all_words = {} for dao, aao in PortalLinguaPortuguesa.get_words(): words = Dictionary(dao, aao).include_variations() all_words.update(words) return all_words if __name__ == '__main__': Settings.check() File('mappings.js').write(get_words())
#!/usr/bin/env python # encoding: utf-8 import requests, re, argparse, time, sys from utils.settings import Settings from utils.words import Words from utils.file import File reload(sys) sys.setdefaultencoding( "utf-8" ) ALPHABET = [chr(i) for i in range(97, 123)] def get_words(): all_words = {} for char in ALPHABET: try: if Settings.VERBOSE: print "\n>> Getting words starting with '%s'" %char.upper() words = Words.get( char ).with_variants() all_words.update( words ) except IndexError: continue return all_words if __name__ == '__main__': Settings.check() File.write( get_words() )
def __init__(self, data_filename): self.logger = logging.getLogger("Glados") self.file = File() self.parser = SentenceParser() self.data_filename = data_filename self.classifier = self.train_and_get_classifier(self.data_filename)
def demo_process(): '''提供demo展示功能''' # 获取文件对象 file = request.files['file'] file = File(file) feature_model = request.form['feature_model'] quality_level = request.form['quality_level'] if model.quality_level != quality_level: model.switch_quality_level(quality_level) # 将二进制转为tensor input = file.load_tensor().cuda() # 输入模型,得到返回结果 e_data = model.encode(input) d_data = model.decode(feat=e_data['feat'], tex=e_data['tex'], intervals=e_data['intervals'], recon=e_data['recon']) data = {**e_data, **d_data} # 保存压缩数据 fic_path = get_path(f'{file.name}.fic') File.save_binary( { 'feat': data['feat'], 'tex': data['tex'], 'intervals': data['intervals'], 'ext': file.ext, }, fic_path) # fic 相关参数 fic_size = path.getsize(fic_path) fic_bpp = get_bpp(fic_size) # 单独保存特征以计算特征和纹理的大小 feat_path = get_path(f'{file.name}_feat.fic') File.save_binary({ 'feat': data['feat'], }, feat_path) # 特征相关参数 feat_size = path.getsize(feat_path) feat_bpp = get_bpp(feat_size) # 纹理相关参数 tex_size = fic_size - feat_size tex_bpp = get_bpp(tex_size) # 待保存图片 imgs = { 'input': data['input'], 'recon': data['recon'], 'resi': data['resi'], 'resi_decoded': data['resi_decoded'], 'resi_norm': data['resi_norm'], 'resi_decoded_norm': data['resi_decoded_norm'], 'output': data['output'], } # 将 imgs 保存并获得对应URL img_urls = {} for key, value in imgs.items(): # 保存图片 file_name = file.name_suffix(key, ext='.bmp') file_path = get_path(file_name) save_image(value, file_path) # 返回图片url链接 img_urls[key] = get_url(file_name) # 计算压缩率 input_name = file.name_suffix('input', ext='.bmp') input_path = get_path(input_name) input_size = path.getsize(input_path) fic_compression_ratio = fic_size / input_size # jpeg对照组处理 jpeg_name = file.name_suffix('jpeg', ext='.jpg') jpeg_path = get_path(jpeg_name) dichotomy_compress(input_path, jpeg_path, target_size=tex_size) img_urls['jpeg'] = get_url(jpeg_name) # jpeg 相关参数计算 jpeg_size = path.getsize(jpeg_path) jpeg_compression_ratio = jpeg_size / input_size jpeg_bpp = get_bpp(jpeg_size) # 其他数据 input_arr = tensor_to_array(data['input']) output_arr = tensor_to_array(data['output']) jpeg_arr = load_image_array(jpeg_path) # 返回的对象 ret = { 'image': img_urls, 'data': get_url(f'{file.name}.fic'), 'eval': { 'fic_bpp': fic_bpp, 'feat_bpp': feat_bpp, 'tex_bpp': tex_bpp, 'jpeg_bpp': jpeg_bpp, 'fic_compression_ratio': fic_compression_ratio, 'jpeg_compression_ratio': jpeg_compression_ratio, 'fic_psnr': psnr(input_arr, output_arr), 'fic_ssim': ssim(input_arr, output_arr), 'jpeg_psnr': psnr(input_arr, jpeg_arr), 'jpeg_ssim': ssim(input_arr, jpeg_arr), }, 'size': { 'fic': fic_size, 'input': input_size, # 'output': fic_size, 'output': tex_size, 'feat': feat_size, 'tex': tex_size, 'jpeg': jpeg_size, } } # 响应请求 response = jsonify(ret) return response
def main(args): parse_args(args) conf = load_json(environ[CONF_FILE]) save_json(conf, 'data/sync_prep.json') shows = load_shows(read_only=True) file_list = [] subs = [] for root, dirs, files in walk(FILE_DIR): for name in files: if '[ignore]' in root or '[ignore]' in name: continue extension = name.split('.')[-1].lower() if extension in EXTENSIONS: if 'sample' in name.lower(): continue file_list.append(File(old_location=path.join(root, name))) if extension in SUBS: subs.append({'text': name, 'value': path.join(root, name)}) series_names_words = [] series_names = [] series_n = sorted(list(shows.keys())) for n in series_n: if shows[n].status == ENDED: continue n1 = clean_up(n) series_names_words.append(n1) series_names.append(n) n2 = n.replace('\'', '') n2 = n2.replace('.', '') n2 = n2.replace(',', '') n2 = clean_up(n2) if not set(n1) == set(n2): series_names.append(n) series_names_words.append(n2) print(series_names) if not conf['all']: remove_folders_with_multiple_files(file_list) i = 0 for file in file_list: file.file_id = i i += 1 regex = [ { "e_end": 6, "e_start": 4, "regex": "s[0-9]{2}e[0-9]{2}", "s_end": 3, "s_start": 1, }, { "e_end": 5, "e_start": 3, "regex": "[0-9]{2}x[0-9]{2}", "s_end": 2, "s_start": 0, }, ] for file in file_list: location = file.old_location.lower() for reg in regex: pattern = re.compile(reg['regex']) match = re.findall(pattern, location) if match: try: file.s_nr = int(match[0][reg['s_start']:reg['s_end']]) file.e_nr = int(match[0][reg['e_start']:reg['e_end']]) break except IndexError: continue for name in series_names_words: if all(word in location for word in name): index = series_names_words.index(name) file.series_name = series_names[index] n_series = {} for n in list(shows.keys()): n_series[n] = { 'tvdb_id': shows[n].tvdb_id, 'name_needed': shows[n].name_needed } n_series['Series Name'] = 0 json = {'shows': n_series, 'files': [], 'subs': subs} for file in file_list: json['files'].append(file.__str__()) save_json(json, environ[OUT_FILE]) pass
class Glados(object): def __init__(self, data_filename): self.logger = logging.getLogger("Glados") self.file = File() self.parser = SentenceParser() self.data_filename = data_filename self.classifier = self.train_and_get_classifier(self.data_filename) """ Public api input: user question text output: answer """ def get_help(self, question): features = self.parser.extract_feature(question) print('features', features) answer = self.classifier.classify(features) prob = self.classifier.prob_classify(features).prob(answer) # self.logger.info('features for question are %s', features) print('Answer:', answer, "(", prob, ")") response = dict(question=question, answer=answer, probability=prob) return response def train_and_get_classifier(self, data_filename): split_ratio = 0.8 data = self.parser.get_content(data_filename) data_set = self.parser.extract_feature_from_doc(data) random.shuffle(data_set) data_length = len(data) train_split = int(data_length * split_ratio) training_data = data_set[:train_split] self.error_analysis_for_text_input_to_feature_extraction(data) self.error_analysis_for_features_to_predicted_answer(training_data) test_data = data_set[train_split:] # self.logger.debug('\n'.join([str(x) for x in data_set])) classifier, classifier_name, test_set_accuracy, training_set_accuracy = self.train_using_naive_bayes( training_data, test_data) self.file.append( get_module_path("output/accuracy.txt"), "\n%s\t\t%s\t\t\t%.8f\t\t%.8f" % (classifier_name, data_length, training_set_accuracy, test_set_accuracy)) return classifier def error_analysis_for_text_input_to_feature_extraction(self, data): text_features_answer = [] for (text, category, answer) in data: sent_features = self.parser.extract_feature(text) text_features_answer.append((text, sent_features, answer)) # logging.debug(datetime.datetime.now()) # print('>>>>>>>>TEXT>>>>>>>>>FEATURES>>>>>>ANSWER:\n ', str(json.dumps(text_features_answer))) def error_analysis_for_features_to_predicted_answer(self, test_data): classifier = nltk.NaiveBayesClassifier.train(test_data) classifier.show_most_informative_features() errors = [] for (feature, actual_output) in test_data: guess = classifier.classify(feature) if (guess != actual_output): errors.append((feature, actual_output, guess)) print( '>>>>>FEATURE>>>>>>>>>ACUTUAL OUTPUT><<<<<<<<PREDICTION<<<<<<<<<<<<<<<' ) print('Errors:', json.dumps(errors)) def train_using_naive_bayes(self, training_data, test_data): classifier = nltk.NaiveBayesClassifier.train(training_data) classifier_name = type(classifier).__name__ training_set_accuracy = nltk.classify.accuracy(classifier, training_data) test_set_accuracy = nltk.classify.accuracy(classifier, test_data) return classifier, classifier_name, test_set_accuracy, training_set_accuracy