def backup_skinshortcuts_properties(propertiesfile, dest_path): '''parse skinshortcuts properties file and translate images''' # look for any backgrounds and translate them propfile = xbmcvfs.File(propertiesfile) data = propfile.read() propfile.close() allprops = eval(data) if data else [] for count, prop in enumerate(allprops): if prop[2] == "background": background = prop[3] if prop[3] else "" defaultid = prop[1] if background.endswith(".jpg") or background.endswith( ".png") or background.endswith(".gif"): background = get_clean_image(background) extension = background.split(".")[-1] newthumb = os.path.join( dest_path, "%s-background-%s.%s" % (xbmc.getSkinDir(), normalize_string(defaultid), extension)) newthumb_vfs = "special://profile/addon_data/script.skinshortcuts/%s-background-%s.%s" % ( xbmc.getSkinDir(), normalize_string(defaultid), extension) if xbmcvfs.exists(background): xbmcvfs.copy(background, newthumb) allprops[count] = [ prop[0], prop[1], prop[2], newthumb_vfs ] # write updated properties file propfile = xbmcvfs.File(propertiesfile, "w") propfile.write(repr(allprops)) propfile.close()
def __init__(self): self.data = [] self.dictionary = Dictionary() self.max_sent_len = 0 # Read the positive reviews with open(POSITIVE_REVIEWS_FILE, encoding='utf-8') as f: positive_reviews = f.readlines() for review in positive_reviews: review = normalize_string(review) review_words = word_tokenize(review) self.dictionary.add_words(review_words) self.data.append((review, 1)) self.max_sent_len = max(self.max_sent_len, 2 + len(review_words)) # Read the negative reviews with open(NEGAGIVE_REVIEWS_FILE, encoding='utf-8') as f: negative_reviews = f.readlines() for review in negative_reviews: review = normalize_string(review) review_words = word_tokenize(review) self.dictionary.add_words(review_words) self.data.append((review, 0)) self.max_sent_len = max(self.max_sent_len, 2 + len(review_words)) # Split the original dataset into train/test random.shuffle(self.data) split_index = int(0.9 * len(self.data)) self.train = AugmentedList(self.data[:split_index]) self.test = AugmentedList(self.data[split_index:])
def is_another_service(position): status = False if 'uber air' in normalize_string(position) or \ 'uberair' in normalize_string(position) or \ 'freight' in normalize_string(position) or \ 'elevate' in normalize_string(position): status = True return status
def main(): args = parse_args() global labels with open(args.labels) as label_file: labels = json.load(label_file) df_service = pd.read_csv(args.service_output, delimiter='\t').set_index('file_name') df_utt = pd.read_csv(args.utterances, delimiter='\t') final_utterances = df_utt['file_name'].to_list() df_utt.set_index('file_name', inplace=True) smoothen_val = 1e-20 ds2_logits = args.ds2_probs data = np.load(ds2_logits, allow_pickle=True) print("Getting ds2 transcripts") utterances, probs_list = zip(*data) ds2_transcripts = ctc_beam_decode(probs_list, labels, args.lm_path, labels.index('_'), args.lm_alpha, args.lm_beta, args.beam_size) ds2_data = list(zip(utterances, probs_list, ds2_transcripts)) print("Getting ds2 word level confidence values") with Pool(multiprocessing.cpu_count()) as pool: ds2_word_confs = list( tqdm(pool.imap(get_word_confidence, ds2_data), total=len(ds2_data))) with open(args.output_path, 'w') as fd: fd.write( 'fname\treference\tservice_transcript\tservice_confs\tds2_transcript\tds2_confs\n' ) for fname in final_utterances: ref = normalize_string(df_utt.loc[fname]['transcript'], labels[1:]) service_transcript, service_conf = df_service.loc[fname][[ 'transcript', 'word_confs' ]] norm_service_transcript = normalize_string(service_transcript, labels[1:]) aligned_service_conf = align_word_confs(service_transcript, norm_service_transcript, service_conf) aligned_service_conf = ' '.join(aligned_service_conf) # norm_service_transcript, aligned_service_conf = parse_text(service_transcript, service_conf) ds2_conf = ds2_word_confs[utterances.index(fname)] ds2_conf = ' '.join([str(x) for x in ds2_conf]) ds2_transcript = ds2_transcripts[utterances.index(fname)] fd.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format(fname, ref, norm_service_transcript, \ aligned_service_conf, ds2_transcript, ds2_conf))
def main(): parser = argparse.ArgumentParser( description="Generate dataset for FineMerge") parser.add_argument( "--ds2_probs", help= "Path to frame-level token probs pkl file obtained from final layer of ds2," "should be list of tuples containing (file_names, probs)", type=str, required=True, ) parser.add_argument( "--service_output", help="Path to ASR service output", type=str, required=True, ) parser.add_argument( "--labels", help= "Path to labels json files containing ordered (w.r.t. to ds2 output) list of output labels", type=str, required=False, default='../labels_char.json') parser.add_argument( "--output_path", help="Path to save the output pickle file", type=str, required=True, ) args = parser.parse_args() with open(args.labels) as label_file: labels = json.load(label_file) service_map = {} with open(args.service_output) as fd: lines = fd.read().splitlines() for _, line in tqdm(enumerate(lines[1:]), total=len(lines[1:])): file_name, transcript, word_confs = line.split('\t') norm_transcript = normalize_string(transcript, labels[1:]) aligned_word_confs = align_word_confs(transcript, norm_transcript, word_confs) # norm_transcript, aligned_word_confs = parse_text(transcript, word_confs) aligned_word_confs = [float(conf) for conf in aligned_word_confs] service_map[file_name] = {'service_transcript' : norm_transcript, \ 'word_confs' : aligned_word_confs} dataset = {} data = np.load(args.ds2_probs, allow_pickle=True) for file_name, probs in data: if file_name in service_map.keys(): dataset[file_name] = service_map[file_name] dataset[file_name]['ds2_probs'] = probs with open(args.output_path, 'wb') as fd: pickle.dump(dataset, fd)
def get_custom_path(self, searchtitle, title): """locate custom folder on disk as pvrart location""" title_path = "" custom_path = self._mutils.addon.getSetting("pvr_art_custom_path") if custom_path and self._mutils.addon.getSetting( "pvr_art_custom") == "true": delim = "\\" if "\\" in custom_path else "/" dirs = xbmcvfs.listdir(custom_path)[0] for strictness in [1, 0.95, 0.9, 0.8]: if title_path: break for directory in dirs: if title_path: break if sys.version_info.major < 3: directory = directory.decode("utf-8") curpath = os.path.join(custom_path, directory) + delim for item in [title, searchtitle]: match = SM(None, item, directory).ratio() if match >= strictness: title_path = curpath break if not title_path and self._mutils.addon.getSetting( "pvr_art_download") == "true": title_path = os.path.join(custom_path, normalize_string(title)) + delim return title_path
def stat_lang(findings_file): with open(findings_file, 'r') as f: findings = json.load(f) sent_max_num = 0 word_max_num = 0 sent_max_report = None word_max_report = None for item in findings: report = item['report'] caption = normalize_string(report) caption = [ sent.strip() for sent in caption.split(' .') if len(sent.strip()) > 0 ] if sent_max_num < len(caption): sent_max_num = len(caption) sent_max_report = caption for sent in caption: words = sent.split() if word_max_num < len(words): word_max_num = len(words) word_max_report = sent print('max sentence number is {} and max words num is {}'.format( sent_max_num, word_max_num)) print(sent_max_report) print(word_max_report)
def main(): args = parse_args() global saved_output, references, labels with open(args.labels) as label_file: labels = json.load(label_file) saved_output = dict(np.load(args.saved_output, allow_pickle=True)) df_utt = pd.read_csv(args.utterances, delimiter='\t') utterances = df_utt['file_name'].to_list() references = df_utt['transcript'].to_list() references = [normalize_string(text, labels[1:]) for text in references] saved_output = [saved_output[utt] for utt in utterances] p = Pool(multiprocessing.cpu_count(), init, [labels, args.lm_path, args.beam_width]) cand_alphas = np.linspace(args.lm_alpha_from, args.lm_alpha_to, args.lm_num_alphas) cand_betas = np.linspace(args.lm_beta_from, args.lm_beta_to, args.lm_num_betas) params_grid = [(float(alpha), float(beta)) for alpha in cand_alphas for beta in cand_betas] scores = [] for params in tqdm(p.imap(decode_dataset, params_grid), total=len(params_grid)): scores.append(list(params)) print("Saving tuning results to finetune.".format(args.output_path)) with open(args.output_path, "w") as fh: json.dump(scores, fh) min_results = min(scores, key=lambda x: x[2]) print("Best Params:\nAlpha: %f \nBeta: %f \nWER: %f" % tuple(min_results))
def parse_row(self, row): text = normalize_string(row.text) sibling = row.getnext() if 'name' in text: self.name = normalize_string(sibling.text) elif 'age' in text and len(text)<=4: self.age = sibling.text elif 'nationality' in text: nationality = sibling.find('img').get('alt') self.nationality = normalize_string(nationality) elif 'formation' in text: self.preferred_formation = sibling.text elif 'success rate' in text: self.win_percentage = sibling.\ xpath(queries["win_percentage"])[0]\ .text\ .replace(',', '.')
def read_words(filename: str, max_length: int) -> List[Word]: words: List[Word] = list() with open(filename, mode='rt', encoding='utf8') as f: for line in f: # type: str for word in normalize_string( line).strip().split(): # type: str if len(word) <= max_length: words.append(Word(list(word))) return words
def main(): parser = argparse.ArgumentParser( description= "create pickle file containing the topN transcipts for each utterance from google API" ) parser.add_argument( "--json_dir", help="Path to dir containing output json from Google API", type=str, required=True, ) parser.add_argument("--labels", help="Path to ds2 output labels", type=str, required=False, default='../labels_char.json') parser.add_argument( "--output_path", help="Path to save output pickle file", type=str, required=True, ) args = parser.parse_args() dataset = {} json_files = glob.glob(join(args.json_dir, '*.json')) with open(args.labels) as label_file: labels = json.load(label_file) for json_pth in json_files: file_name = basename(json_pth).replace('.json', '.wav') try: with open(json_pth) as fd: data = json.load(fd) except: print(json_pth) continue for rank, alt in enumerate(data['results'][0]['alternatives']): transcript = normalize_string(alt['transcript'], labels[1:]) # transcript = parse_text(alt['transcript']) confidence = alt['confidence'] if rank == 0: dataset[file_name] = { 'transcripts': [transcript], 'confidences': [confidence] } else: dataset[file_name]['transcripts'].append(transcript) dataset[file_name]['confidences'].append(confidence) with open(args.output_path, 'wb') as fd: pickle.dump(dataset, fd)
def parse_info(self): rows = self.get_info_table().findall('tr') rows = map(lambda row: row.find('th'), rows) for row in rows: self.parse_row(row) if self.name == '' or self.name == None: name_attempt = self.tree.xpath("//div[@itemprop='name']/h1") if name_attempt == '' or name_attempt == None: raise Exception("manager parsing error") else: self.name = normalize_string(name_attempt[0].text)
def backup_skinshortcuts_images(shortcutfile, dest_path): '''parse skinshortcuts file and copy images to backup location''' shortcutfile = xbmc.translatePath(shortcutfile).decode("utf-8") doc = parse(shortcutfile) listing = doc.documentElement.getElementsByTagName('shortcut') for shortcut in listing: defaultid = shortcut.getElementsByTagName('defaultID') if defaultid: defaultid = defaultid[0].firstChild if defaultid: defaultid = defaultid.data if not defaultid: defaultid = shortcut.getElementsByTagName( 'label')[0].firstChild.data thumb = shortcut.getElementsByTagName('thumb') if thumb: thumb = thumb[0].firstChild if thumb: thumb = thumb.data if thumb and (thumb.endswith(".jpg") or thumb.endswith(".png") or thumb.endswith(".gif")): thumb = get_clean_image(thumb) extension = thumb.split(".")[-1] newthumb = os.path.join( dest_path, "%s-thumb-%s.%s" % (xbmc.getSkinDir(), normalize_string(defaultid), extension)) newthumb_vfs = "special://profile/addon_data/script.skinshortcuts/%s-thumb-%s.%s" % ( xbmc.getSkinDir(), normalize_string(defaultid), extension) if xbmcvfs.exists(thumb): xbmcvfs.copy(thumb, newthumb) shortcut.getElementsByTagName( 'thumb')[0].firstChild.data = newthumb_vfs # write changes to skinshortcuts file shortcuts_file = xbmcvfs.File(shortcutfile, "w") shortcuts_file.write(doc.toxml(encoding='utf-8')) shortcuts_file.close()
def chat(): req_data = request.get_json() message = utils.normalize_string((req_data['message'])) try: indices = utils.get_batched_indices(message) except KeyError: reply = "I did not understand your language!!, check the spelling perhaps" else: numpy_array = utils.list2numpy(indices) reply = redis_db.process(numpy_array) resp = jsonify(reply=reply) return resp
def next_batch(self, batch_size, mode=TRAIN_MODE): review_lengths, reviews, targets = [], [], [] data = self.train if mode == TRAIN_MODE else self.test batch = data.next_items(batch_size) for (review, target) in batch: review_length = len(word_tokenize(normalize_string(review))) review = indexes_from_sentence(review, self.dictionary, self.max_sent_len) target = one_hot_encoding(2, target) reviews.append(review) targets.append(target) review_lengths.append(review_length) return review_lengths, reviews, targets
def clean_data(mylist): """ Removes non-relevant entries :param mylist: list of dict :return: a list of dict """ new_list = [] for employee in mylist: name = employee['name'] position = employee['position'] if normalize_string(name) == 'linkedin member': continue if Conditions.meet_conditions(position): new_list.append(employee) return new_list
def main(): global data, utterances, references, labels, args args = parse_args() with open(args.labels) as label_file: labels = json.load(label_file) data = np.load(args.data, allow_pickle=True) df_utt = pd.read_csv(args.utterances, delimiter='\t') df_utt = df_utt[df_utt['file_name'].isin(data.keys())] #TODO utterances = df_utt['file_name'].to_list() references = df_utt['transcript'].to_list() references = [normalize_string(text, labels[1:]) for text in references] print("Computing ctc alignments service transcripts...") with Pool(multiprocessing.cpu_count()) as pool: alignments = list( tqdm(pool.imap(get_alignments, utterances), total=len(utterances))) for utt, alignment in zip(utterances, alignments): data[utt]['alignment'] = alignment cand_ths = np.linspace(args.th_from, args.th_to, args.num_th) cand_service_weights = np.linspace(args.service_weight_from, \ args.service_weight_to, args.num_service_weights) cand_blank_confs = np.linspace(args.blank_conf_from, \ args.blank_conf_to, args.num_blank_confs) params_grid = [(th, service_weight, blank_conf) for th in cand_ths for service_weight in cand_service_weights for blank_conf in cand_blank_confs] scores = [] with Pool(multiprocessing.cpu_count()) as pool: for params in tqdm(pool.imap(merge_transcripts, params_grid), total=len(params_grid)): scores.append(list(params)) print("Saving tuning results to finetune.".format(args.output_path)) with open(args.output_path, "w") as fh: json.dump(scores, fh) min_results = min(scores, key=lambda x: x[-1]) print("Best Params:\nThreshold: %.12f \nService Weight: %.2f"\ " \nBlank Conf %.2f \nWER: %.6f" % tuple(min_results))
def getHeader(self, authenticate=True): clientInfo = self.clientInfo deviceName = clientInfo.getDeviceName() deviceName = utils.normalize_string(deviceName.encode('utf-8')) deviceId = clientInfo.getDeviceId() version = clientInfo.getVersion() if not authenticate: # If user is not authenticated auth = ( 'MediaBrowser Client="Kodi", Device="%s", DeviceId="%s", Version="%s"' % (deviceName, deviceId, version)) header = { 'Content-type': 'application/json', 'Accept-encoding': 'gzip', 'Accept-Charset': 'UTF-8,*', 'Authorization': auth } self.logMsg("Header: %s" % header, 2) else: userId = self.userId token = self.token # Attached to the requests session auth = ( 'MediaBrowser UserId="%s", Client="Kodi", Device="%s", DeviceId="%s", Version="%s"' % (userId, deviceName, deviceId, version)) header = { 'Content-type': 'application/json', 'Accept-encoding': 'gzip', 'Accept-Charset': 'UTF-8,*', 'Authorization': auth, 'X-MediaBrowser-Token': token } self.logMsg("Header: %s" % header, 2) return header
def word_frequency(findings_file): with open(findings_file, 'r') as f: findings = json.load(f) word2count = {} for item in findings: caption = item['report'].strip().lower() caption = normalize_string(caption) caption = [ sent.strip() for sent in caption.split(' .') if len(sent.strip()) > 0 ] for sent in caption: for word in sent.split(): if not re.match(r'^[a-zA-Z]+$', word): continue word2count[word] = word2count.get(word, 0) + 1 total = sum(word2count.values()) word_frequency = [{ 'word': word, 'count': count, 'frequency': count * 1.0 / total } for word, count in word2count.items()] word_frequency.sort(key=lambda x: x['count'], reverse=True) frequency_sum = 0.0 for idx, item in enumerate(word_frequency): frequency_sum = frequency_sum + item['frequency'] if frequency_sum > 0.99: print('top {} words covers 99% occurrences'.format(idx + 1)) break word_frequency = pd.DataFrame(word_frequency) word_frequency.to_csv('../output/preprocess/IU_Chest_XRay/words.csv', index=False)
def caption_preprocess(self, report): caption = normalize_string(report) caption = [ sent.strip() for sent in caption.split(' .') if len(sent.strip()) > 0 ] sent_embds = [] sent_length = [] sent_num = min(len(caption), MAX_SENT) ## 截断过长的caption以及过长的sent for i_sent in range(MAX_SENT): if i_sent >= len(caption): sent_embds.append([self.word2idx['EOS']] * (MAX_WORDS + 1)) sent_length.append(0) continue sent = caption[i_sent] words = sent.split() length = min(len(words), MAX_WORDS) sent_length.append(length) sent_embd = [] for i_word in range(MAX_WORDS + 1): if i_word >= len(words): sent_embd.append(self.word2idx['EOS']) else: word = words[i_word] sent_embd.append( self.word2idx.get(word, self.word2idx['UNK'])) sent_embds.append(sent_embd) return sent_embds, sent_num, sent_length
def get_custom_path(self, searchtitle, title): '''locate custom folder on disk as pvrart location''' title_path = "" custom_path = self.metadatautils.addon.getSetting("pvr_art_custom_path") if custom_path and self.metadatautils.addon.getSetting("pvr_art_custom") == "true": delim = "\\" if "\\" in custom_path else "/" dirs = xbmcvfs.listdir(custom_path)[0] for strictness in [1, 0.95, 0.9, 0.8]: if title_path: break for directory in dirs: if title_path: break directory = directory.decode("utf-8") curpath = os.path.join(custom_path, directory) + delim for item in [title, searchtitle]: match = SM(None, item, directory).ratio() if match >= strictness: title_path = curpath break if not title_path and self.metadatautils.addon.getSetting("pvr_art_download") == "true": title_path = os.path.join(custom_path, normalize_string(title)) + delim return title_path
def create_colortheme(self): '''create a colortheme from current skin color settings''' try: current_skinfont = None json_response = kodi_json("Settings.GetSettingValue", {"setting": "lookandfeel.font"}) if json_response: current_skinfont = json_response current_skincolors = None json_response = kodi_json("Settings.GetSettingValue", {"setting": "lookandfeel.skincolors"}) if json_response: current_skincolors = json_response # user has to enter name for the theme themename = xbmcgui.Dialog().input( self.addon.getLocalizedString(32023), type=xbmcgui.INPUT_ALPHANUM).decode("utf-8") if not themename: return xbmc.executebuiltin("ActivateWindow(busydialog)") xbmc.executebuiltin( "Skin.SetString(SkinHelper.LastColorTheme,%s)" % themename.encode("utf-8")) # add screenshot custom_thumbnail = xbmcgui.Dialog().browse( 2, self.addon.getLocalizedString(32024), 'files') if custom_thumbnail: xbmcvfs.copy(custom_thumbnail, self.userthemes_path + themename + ".jpg") # read the guisettings file to get all skin settings from backuprestore import BackupRestore skinsettingslist = BackupRestore().get_skinsettings([ "color", "opacity", "texture", "panel", "colour", "background", "image" ]) newlist = [] if skinsettingslist: newlist.append(("THEMENAME", themename)) newlist.append( ("DESCRIPTION", self.addon.getLocalizedString(32025))) newlist.append( ("SKINTHEME", xbmc.getInfoLabel("Skin.CurrentTheme"))) newlist.append(("SKINFONT", current_skinfont)) newlist.append(("SKINCOLORS", current_skincolors)) # look for any images in the skin settings and translate them so they can # be included in the theme backup for skinsetting in skinsettingslist: setting_type = skinsetting[0] setting_name = skinsetting[1] setting_value = skinsetting[2] if setting_type == "string" and setting_value: if (setting_value and (setting_value.endswith(".png") or setting_value.endswith(".gif") or setting_value.endswith(".jpg")) and "resource://" not in setting_value): image = get_clean_image(setting_value) extension = image.split(".")[-1] newimage = "%s_%s.%s" % ( themename, normalize_string(setting_name), extension) newimage_path = self.userthemes_path + newimage if xbmcvfs.exists(image): xbmcvfs.copy(image, newimage_path) skinsetting = (setting_type, setting_name, newimage_path) newlist.append(skinsetting) # save guisettings text_file_path = self.userthemes_path + themename + ".theme" text_file = xbmcvfs.File(text_file_path, "w") text_file.write(repr(newlist)) text_file.close() xbmc.executebuiltin("Dialog.Close(busydialog)") xbmcgui.Dialog().ok(self.addon.getLocalizedString(32026), self.addon.getLocalizedString(32027)) except Exception as exc: xbmc.executebuiltin("Dialog.Close(busydialog)") log_exception(__name__, exc) xbmcgui.Dialog().ok(self.addon.getLocalizedString(32028), self.addon.getLocalizedString(32030), str(exc))
def is_former(position): status = False if 'uber' not in normalize_string( position) or 'ex-uber' in normalize_string(position): status = True return status
def seq2words(sequences_list): normaliszed_seq_list = [utils.normalize_string(seq) for seq in sequences_list] token_seq_list = [seq.split() for seq in normaliszed_seq_list] return token_seq_list
def main(): user_input = 'Who cares?' sentence = utils.normalize_string(user_input) output_words, decoder_attn = evaluate(sentence) output_sentence = ' '.join(output_words) print("Sentence: {}\nTranslated Sentence: {}".format(user_input, output_sentence))
def is_driver(position): status = False if 'driver' in normalize_string( position) or 'motorista' in normalize_string(position): status = True return status
def main(): parser = argparse.ArgumentParser( description="Generate binary n-gram lm file given a text file" ) parser.add_argument( "--text_file", help="Path to text file to train the lm from", type=str, required=True, ) parser.add_argument( "--order", help="order of lm to train", type=int, required=False, default=3, ) parser.add_argument( "--exclude_text", help="Path to text file whose sentences must be excluded from training", type=str, required=True, ) parser.add_argument( "--kenlm_dir", help="Path to the kenlm directory required for training n-gram lm", type=str, required=False, default='~/exp/kenlm', ) parser.add_argument( "--labels", help="Path to char level tokens for parsing", type=str, required=True, ) parser.add_argument( "--lm_path", help="Path to the save the trained lm to", type=str, required=True, ) args = parser.parse_args() print("Preparing text for training the lm....") with open(args.labels) as label_file: labels = json.load(label_file) with open(args.text_file) as fd: lm_text = fd.read().splitlines() lm_text = set([normalize_string(sentence, labels[1:]) for sentence in lm_text]) with open(args.exclude_text) as fd: text_to_exclude = fd.read().splitlines() text_to_exclude = set([normalize_string(sentence, labels[1:]) for \ sentence in text_to_exclude]) lm_text_final = lm_text - text_to_exclude with open('lm_text.txt', 'w') as fd: fd.write('\n'.join(lm_text_final)) print('Build the arpa lm file of order {} ....'.format(args.order)) command = '{} -o {} < lm_text.txt > lm.arpa'.format( os.path.join(args.kenlm_dir, 'build/bin/lmplz'), args.order) process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) process.communicate() command = '{} lm.arpa {}'.format( os.path.join(args.kenlm_dir, 'build/bin/build_binary'), args.lm_path) process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True) process.communicate() os.remove('lm_text.txt') os.remove('lm.arpa')
def check(self, name, release_year): years = self.name_to_years[utils.normalize_string(name)] for year in years: if int(release_year) > year: return True return False
def __init__(self): df = pandas.read_csv(pathmng.wiki_best_actor_director_path) for index, row in df.iterrows(): self.name_to_years[utils.normalize_string(row["name"])].add(row.year)
import config import pathmng import utils import json from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from crawl.imdb import lazyCrawl rotten_path = "file:///" + config.crawl_data_path + "\\rotten.csv" cleaned_rotten_path = "file:///" + config.crawl_data_path + "\\rotten_cleaned\\" imdb_path = "file:///" + config.crawl_data_path + "\\imdb.csv" cleaned_imdb_path = "file:///" + config.crawl_data_path + "\\imdb_cleaned.csv" normalize_title_func = udf(lambda x: utils.normalize_string(x)) class AwardsCheckExist: name_to_years = defaultdict(lambda: set()) _singleton = None def __init__(self): df = pandas.read_csv(pathmng.wiki_best_actor_director_path) for index, row in df.iterrows(): self.name_to_years[utils.normalize_string(row["name"])].add(row.year) def check(self, name, release_year): years = self.name_to_years[utils.normalize_string(name)] for year in years:
def getThemeMedia(): doUtils = downloadutils.DownloadUtils() dialog = xbmcgui.Dialog() playback = None # Choose playback method resp = dialog.select(lang(33072), [lang(30165), lang(33071)]) if resp == 0: playback = "DirectPlay" elif resp == 1: playback = "DirectStream" else: return library = xbmc.translatePath( "special://profile/addon_data/emby.for.kodi/library/").decode('utf-8') # Create library directory if not xbmcvfs.exists(library): xbmcvfs.mkdir(library) # Set custom path for user if xbmc.getCondVisibility('System.HasAddon(script.tvtunes)'): tvtunes = xbmcaddon.Addon(id="script.tvtunes") tvtunes.setSetting('custom_path_enable', "true") tvtunes.setSetting('custom_path', library) log.info("TV Tunes custom path is enabled and set.") else: # if it does not exist this will not work so warn user # often they need to edit the settings first for it to be created. dialog.ok(heading=lang(29999), line1=lang(33073)) xbmc.executebuiltin('Addon.OpenSettings(script.tvtunes)') return # Get every user view Id with database.DatabaseConn('emby') as cursor: emby_db = embydb.Embydb_Functions(cursor) viewids = emby_db.getViews() # Get Ids with Theme Videos itemIds = {} for view in viewids: url = "{server}/emby/Users/{UserId}/Items?HasThemeVideo=True&ParentId=%s&format=json" % view result = doUtils.downloadUrl(url) if result['TotalRecordCount'] != 0: for item in result['Items']: itemId = item['Id'] folderName = item['Name'] folderName = utils.normalize_string(folderName.encode('utf-8')) itemIds[itemId] = folderName # Get paths for theme videos for itemId in itemIds: nfo_path = xbmc.translatePath( "special://profile/addon_data/emby.for.kodi/library/%s/" % itemIds[itemId]) # Create folders for each content if not xbmcvfs.exists(nfo_path): xbmcvfs.mkdir(nfo_path) # Where to put the nfos nfo_path = "%s%s" % (nfo_path, "tvtunes.nfo") url = "{server}/emby/Items/%s/ThemeVideos?format=json" % itemId result = doUtils.downloadUrl(url) # Create nfo and write themes to it nfo_file = xbmcvfs.File(nfo_path, 'w') pathstowrite = "" # May be more than one theme for theme in result['Items']: putils = playutils.PlayUtils(theme) if playback == "DirectPlay": playurl = putils.directPlay() else: playurl = putils.directStream() pathstowrite += ('<file>%s</file>' % playurl.encode('utf-8')) # Check if the item has theme songs and add them url = "{server}/emby/Items/%s/ThemeSongs?format=json" % itemId result = doUtils.downloadUrl(url) # May be more than one theme for theme in result['Items']: if playback == "DirectPlay": playurl = api.API(theme).get_file_path() else: playurl = playutils.PlayUtils(theme).directStream() pathstowrite += ('<file>%s</file>' % playurl.encode('utf-8')) nfo_file.write( '<tvtunes>%s</tvtunes>' % pathstowrite ) # Close nfo file nfo_file.close() # Get Ids with Theme songs musicitemIds = {} for view in viewids: url = "{server}/emby/Users/{UserId}/Items?HasThemeSong=True&ParentId=%s&format=json" % view result = doUtils.downloadUrl(url) if result['TotalRecordCount'] != 0: for item in result['Items']: itemId = item['Id'] folderName = item['Name'] folderName = utils.normalize_string(folderName.encode('utf-8')) musicitemIds[itemId] = folderName # Get paths for itemId in musicitemIds: # if the item was already processed with video themes back out if itemId in itemIds: continue nfo_path = xbmc.translatePath( "special://profile/addon_data/emby.for.kodi/library/%s/" % musicitemIds[itemId]) # Create folders for each content if not xbmcvfs.exists(nfo_path): xbmcvfs.mkdir(nfo_path) # Where to put the nfos nfo_path = "%s%s" % (nfo_path, "tvtunes.nfo") url = "{server}/emby/Items/%s/ThemeSongs?format=json" % itemId result = doUtils.downloadUrl(url) # Create nfo and write themes to it nfo_file = xbmcvfs.File(nfo_path, 'w') pathstowrite = "" # May be more than one theme for theme in result['Items']: if playback == "DirectPlay": playurl = api.API(theme).get_file_path() else: playurl = playutils.PlayUtils(theme).directStream() pathstowrite += ('<file>%s</file>' % playurl.encode('utf-8')) nfo_file.write( '<tvtunes>%s</tvtunes>' % pathstowrite ) # Close nfo file nfo_file.close() dialog.notification( heading=lang(29999), message=lang(33069), icon="special://home/addons/emby.for.kodi/icon.png", time=1000, sound=False)
def getThemeMedia(): doUtils = downloadutils.DownloadUtils() dialog = xbmcgui.Dialog() playback = None # Choose playback method resp = dialog.select("Playback method for your themes", ["Direct Play", "Direct Stream"]) if resp == 0: playback = "DirectPlay" elif resp == 1: playback = "DirectStream" else: return library = xbmc.translatePath( "special://profile/addon_data/plugin.video.emby/library/").decode('utf-8') # Create library directory if not xbmcvfs.exists(library): xbmcvfs.mkdir(library) # Set custom path for user tvtunes_path = xbmc.translatePath( "special://profile/addon_data/script.tvtunes/").decode('utf-8') if xbmcvfs.exists(tvtunes_path): tvtunes = xbmcaddon.Addon(id="script.tvtunes") tvtunes.setSetting('custom_path_enable', "true") tvtunes.setSetting('custom_path', library) utils.logMsg("EMBY", "TV Tunes custom path is enabled and set.", 1) else: # if it does not exist this will not work so warn user # often they need to edit the settings first for it to be created. dialog.ok( heading="Warning", line1=( "The settings file does not exist in tvtunes. ", "Go to the tvtunes addon and change a setting, then come back and re-run.")) xbmc.executebuiltin('Addon.OpenSettings(script.tvtunes)') return # Get every user view Id embyconn = utils.kodiSQL('emby') embycursor = embyconn.cursor() emby_db = embydb.Embydb_Functions(embycursor) viewids = emby_db.getViews() embycursor.close() # Get Ids with Theme Videos itemIds = {} for view in viewids: url = "{server}/emby/Users/{UserId}/Items?HasThemeVideo=True&ParentId=%s&format=json" % view result = doUtils.downloadUrl(url) if result['TotalRecordCount'] != 0: for item in result['Items']: itemId = item['Id'] folderName = item['Name'] folderName = utils.normalize_string(folderName.encode('utf-8')) itemIds[itemId] = folderName # Get paths for theme videos for itemId in itemIds: nfo_path = xbmc.translatePath( "special://profile/addon_data/plugin.video.emby/library/%s/" % itemIds[itemId]) # Create folders for each content if not xbmcvfs.exists(nfo_path): xbmcvfs.mkdir(nfo_path) # Where to put the nfos nfo_path = "%s%s" % (nfo_path, "tvtunes.nfo") url = "{server}/emby/Items/%s/ThemeVideos?format=json" % itemId result = doUtils.downloadUrl(url) # Create nfo and write themes to it nfo_file = xbmcvfs.File(nfo_path, 'w') pathstowrite = "" # May be more than one theme for theme in result['Items']: putils = playutils.PlayUtils(theme) if playback == "DirectPlay": playurl = putils.directPlay() else: playurl = putils.directStream() pathstowrite += ('<file>%s</file>' % playurl.encode('utf-8')) # Check if the item has theme songs and add them url = "{server}/emby/Items/%s/ThemeSongs?format=json" % itemId result = doUtils.downloadUrl(url) # May be more than one theme for theme in result['Items']: putils = playutils.PlayUtils(theme) if playback == "DirectPlay": playurl = putils.directPlay() else: playurl = putils.directStream() pathstowrite += ('<file>%s</file>' % playurl.encode('utf-8')) nfo_file.write( '<tvtunes>%s</tvtunes>' % pathstowrite ) # Close nfo file nfo_file.close() # Get Ids with Theme songs musicitemIds = {} for view in viewids: url = "{server}/emby/Users/{UserId}/Items?HasThemeSong=True&ParentId=%s&format=json" % view result = doUtils.downloadUrl(url) if result['TotalRecordCount'] != 0: for item in result['Items']: itemId = item['Id'] folderName = item['Name'] folderName = utils.normalize_string(folderName.encode('utf-8')) musicitemIds[itemId] = folderName # Get paths for itemId in musicitemIds: # if the item was already processed with video themes back out if itemId in itemIds: continue nfo_path = xbmc.translatePath( "special://profile/addon_data/plugin.video.emby/library/%s/" % musicitemIds[itemId]) # Create folders for each content if not xbmcvfs.exists(nfo_path): xbmcvfs.mkdir(nfo_path) # Where to put the nfos nfo_path = "%s%s" % (nfo_path, "tvtunes.nfo") url = "{server}/emby/Items/%s/ThemeSongs?format=json" % itemId result = doUtils.downloadUrl(url) # Create nfo and write themes to it nfo_file = xbmcvfs.File(nfo_path, 'w') pathstowrite = "" # May be more than one theme for theme in result['Items']: putils = playutils.PlayUtils(theme) if playback == "DirectPlay": playurl = putils.directPlay() else: playurl = putils.directStream() pathstowrite += ('<file>%s</file>' % playurl.encode('utf-8')) nfo_file.write( '<tvtunes>%s</tvtunes>' % pathstowrite ) # Close nfo file nfo_file.close() dialog.notification( heading="Emby for Kodi", message="Themes added!", icon="special://home/addons/plugin.video.emby/icon.png", time=1000, sound=False)
def _read(self, filename): with open(filename) as fp: content = fp.read() return normalize_string(content)