def updateChannel(self, schid, ownID=0, ownCID=0): if not ownID: (err, ownID) = ts3lib.getClientID(schid) if not ownCID: (err, ownCID) = ts3lib.getChannelOfClient(schid, ownID) (err, cname) = ts3lib.getChannelVariable( schid, ownCID, ts3defines.ChannelProperties.CHANNEL_NAME) name = re.sub(r'^\[[crl\*]spacer(.*)?\]', '', cname, flags=re.IGNORECASE | re.UNICODE) from unidecode import unidecode self.activity["state"] = unidecode(name) clients = len(ts3lib.getChannelClientList(schid, ownCID)[1]) # (err, clients) = ts3lib.getChannelVariable(schid, ts3defines.ChannelPropertiesRare.) (err, cmax) = ts3lib.getChannelVariable( schid, ownCID, ts3defines.ChannelProperties.CHANNEL_MAXCLIENTS) if cmax >= clients: if PluginHost.cfg.getboolean("general", "verbose"): print("cmax", cmax, ">=", "clients", clients) self.activity["party"]["size"] = [clients, cmax] else: (err, smax) = ts3lib.getServerVariable( schid, ts3defines.VirtualServerProperties.VIRTUALSERVER_MAXCLIENTS) # (err, clients) = ts3lib.getServerVariable(schid, ts3defines.VirtualServerProperties.VIRTUALSERVER_CLIENTS_ONLINE) # clients = len(ts3lib.getClientList(schid)[1]) self.activity["party"] = {"size": [clients, smax]} (err, ip) = ts3lib.getConnectionVariable( schid, ownID, ts3defines.ConnectionProperties.CONNECTION_SERVER_IP) (err, port) = ts3lib.getConnectionVariable( schid, ownID, ts3defines.ConnectionProperties.CONNECTION_SERVER_PORT) self.activity["secrets"] = { "join": "ts3server://{}?port={}&cid={}".format(ip, port, ownCID) } self.update = True
def create_Historical_Receipt(description, event_date, fname, lname, money_flow, email_tutor): global receipt_tutor_folder receipt_number = str(random_with_N_digits(8)) receipt_name = receipt_number + "_" + unidecode(fname + lname) + ".pdf" receipt_path = receipt_tutor_folder + receipt_name receipt_description = "Description : " + description event_date = str(event_date) year, month, day = map(str, event_date.split('-')) event_date = day + "/" + month + "/" + year event_date = "Event date : " + str( event_date) name = "Tutor : " + fname + " " + lname system_timestamp = time.strftime('%d/%m/%Y %X') receipt_timestamp = "Date and time of transaction :" + " " + system_timestamp amount = "TUMi cash flow : €" + money_flow receipt_number = "Receipt number : " + receipt_number save_name = os.path.join(os.path.expanduser("~"), receipt_tutor_folder, receipt_name) c = canvas.Canvas(save_name, pagesize=A4) # header text c.setFont(size=14, psfontname="Helvetica", leading=None) c.drawString(70, 670, "International Centre") c.drawString(70, 650, "Technical University of Munich") c.drawString(70, 630, "Arcisstr. 21, 80333 Munich") c.drawString(70, 610, "Germany") c.setFont(size=22, psfontname="Helvetica-Bold", leading=None) c.drawCentredString(x=300, y=570, text="RECEIPT") c.setFont(size=14, psfontname="Helvetica", leading=None) c.drawString(70, 510, receipt_description) c.drawString(70, 490, event_date) c.drawString(70, 450, name) c.drawString(70, 430, receipt_timestamp) c.drawString(70, 410, amount) c.drawString(70, 370, receipt_number) c.drawImage(image='index.png', x=295, y=700, preserveAspectRatio=False, width=229.2, height=79.2) c.showPage() c.save() sendReceipt(fname, lname, description, receipt_path, receipt_name, email_tutor, receipt_type=2)
def remove_spl_characters_from_file(src_filepath, dst_filepath): counter = 1 if os.path.exists(dst_filepath): dst_path = dst_filepath else: dst_path = os.mkdir(dst_filepath) dst_path = dst_filepath for filename in os.listdir(src_filepath): if os.path.isfile(src_filepath + filename): print(str(counter) + ":" + filename) counter = counter + 1 ogfilepath = src_filepath + filename print("original file name: ", ogfilepath) convfilepath = dst_path + filename print("converted file name: ", convfilepath) originalFile = open(ogfilepath, "r", encoding="latin-1") convertFile = open(convfilepath, "w", encoding="ascii") for line in originalFile: line = unidecode(line) line = line.replace("\x1a", "") convertFile.write(line) convertFile.close print("**File writing process completed") else: print(filename + ": is not a file") print("***********End of process************") return (originalFile, convertFile)
def parse_xword(self, xword_data): xword_metadata = xword_data.get('copy', '') xword_data = xword_data.get('grid', '') date_string = xword_metadata.get('date-publish-analytics').split()[0] self.date = datetime.datetime.strptime(date_string, '%Y/%m/%d') fetched = {} for field in ['title', 'byline', 'publisher']: fetched[field] = html2text(xword_metadata.get(field, ''), bodywidth=0).strip() puzzle = puz.Puzzle() puzzle.title = fetched.get('title') puzzle.author = fetched.get('byline') puzzle.copyright = fetched.get('publisher') puzzle.width = int(xword_metadata.get('gridsize').get('cols')) puzzle.height = int(xword_metadata.get('gridsize').get('rows')) solution = '' fill = '' markup = b'' for row in xword_data: for cell in row: if not cell['Letter']: fill += '.' solution += '.' markup += b'\x00' else: fill += '-' solution += cell['Letter'] markup += (b'\x80' if (cell.get('style', '') and cell['style']['shapebg'] == 'circle') else b'\x00') puzzle.fill = fill puzzle.solution = solution clue_list = xword_metadata['clues'][0]['clues'] + \ xword_metadata['clues'][1]['clues'] sorted_clue_list = sorted(clue_list, key=lambda x: int(x['number'])) clues = [clue['clue'] for clue in sorted_clue_list] normalized_clues = [ html2text(unidecode(clue), bodywidth=0) for clue in clues] puzzle.clues = normalized_clues has_markup = b'\x80' in markup if has_markup: puzzle.extensions[b'GEXT'] = markup puzzle._extensions_order.append(b'GEXT') puzzle.markup() return puzzle
def get_valid_name(name): name = str(name).strip() # Strip start/end whitespaces name = unidecode(name) # Convert to ascii, e.g. é becomes e name = re.sub(r'[:\\\/]', '-', name) # Convert : \ and / to - name = re.sub( r'(?u)[^-\w. ]', '', name) # Remove everything else that's not a standard character return name
def save(self, *args, **kwargs): post = super().save(commit=False) strtime = "".join(str(time.time()).split(".")) slug = unidecode(post.title) string = "%s-%s" % (slug, strtime[7:]) self.slug = slugify(string, allow_unicode=True) post.save() return post
def remplace_accents(tweet): """ remplacer les accents. """ try: s1 = unidecode(tweet) s2 = unicodedata.normalize('NFD', s1).encode('ascii', 'ignore') return str(s2, 'utf-8') except: return ("error")
def prepare(self, obj): original = super(TranslatedTextField, self).prepare(obj) if original is None: return None with translation.override(self.language_code): translated = _(unicode(original).replace('\n', '').replace('\r', '')) if not self.unidecode: return translated from unidecode import unidecode return unidecode(translated)
def save(self, *args, **kwargs): self.slug = slugify(unidecode(self.title)) if not self.body and self.md_file: self.body = self.md_file.read() html = markdown2.markdown(self.body, extras=["fenced-code-blocks", "tables"]) self.html_file.save(self.title + '.html', ContentFile(html.encode('utf-8')), save=False) self.html_file.close() super().save(*args, **kwargs)
def playlist_artists(user_in, ID_in): """ USE SPOTIPY TO GET TRACKS FROM SPOTIFY PLAYLISTS """ raw_dat = spotify.user_playlist_tracks(user=user_in, playlist_id=ID_in) song_list = raw_dat['items'] artist_df = pd.DataFrame() for song in song_list: try: artists = song['track']['artists'] for artist in artists: artist_name = unidecode( str((artist['name'].encode('utf-8')), encoding="utf-8")) artist_ID = str(artist['uri']).replace('spotify:artist:', '') artist_dat = spotify.artist(artist_id=artist_ID) artist_followers = artist_dat['followers']['total'] artist_popularity = artist_dat['popularity'] artist_array = pd.DataFrame([[ artist_name, artist_ID, artist_followers, artist_popularity ]], columns=[ 'artist_name', 'artist_ID', 'artist_followers', 'artist_popularity' ]) artist_df = artist_df.append(artist_array) except TypeError as Err: artist_name = ' ' artist_ID = ' ' artist_followers = ' ' artist_popularity = ' ' artist_array = pd.DataFrame( [[artist_name, artist_ID, artist_followers, artist_popularity] ], columns=[ 'artist_name', 'artist_ID', 'artist_followers', 'artist_popularity' ]) artist_df = artist_df.append(artist_array) return artist_df
def transliterate(string): """Transliterates string into his closest representation. Ex: 1. àé => ae, 2. สวัสดีครับ => swasdiikhrab. :param string: string :return: closest string. """ if not isinstance(string, bytes): string = u''.join(string) return unidecode(string)
def leitura_stopwords(caminho): stopwords = [] stop = open(caminho, "r", encoding='utf-8') stopwords = stop.readlines() stop.close() for i in range(len(stopwords)): stopwords[i] = stopwords[i].lower() for i in range(len(stopwords)): stopwords[i] = unidecode(stopwords[i].replace("\n", "")) stopwords = sorted(stopwords) return stopwords
def _clean_text(text: str): """ 用于对句子进行整理,将美元、英镑、数字、小数点、序 数词等等转化为单词,同时对部分缩写进行扩展 :param text: 单个句子文本 :return: 处理好的文本序列 """ text = unidecode(text) text = text.lower() text = _clean_number(text=text) text = _abbreviations_to_word(text=text) text = re.sub(r"\s+", " ", text) return text
def process(self, element, int_lst): try: from unidecode import unidecode d = {str(k):unidecode(str(v)) for (k, v) in element.items()} d = {k:(int(v) if k in int_lst else v) for (k,v) in element.items()} logging.info('{0}: {1}'.format(self.__class__.__name__, element)) yield d except Exception as e: element['error_msg'] = e element['ptransform'] = self.__class__.__name__ element['error_tag'] = True element['script'] = os.path.basename(sys.argv[0]) logging.info('Error received at {0}: {1}'.format(self.__class__.__name__, element)) yield element
def handleFinalPage(r, url): global finalPagesReached global currentList finalPagesReached += 1 soup = bs(r) # Get image description logo_div = soup.find_all('div', attrs={'class': 'mainLogo'})[0] logo = logo_div.find_all('img')[0] logo_desc = logo['title'] logo_url = logo['src'] # Compile attrs attrs = [currentList, url, logo_desc, logo_url] # Logging print '\033[92m' + str(finalPagesReached) + ': \033[0m' + logo_desc with open('sports.csv', 'a') as out: wr = csv.writer(out, quoting=csv.QUOTE_ALL) wr.writerow([unidecode(attr) for attr in attrs])
def labelItem(item,il,title): def labelAs(a): # function for setting labels item.editLabels(labels={'en': a}, summary=(u'set [en] label to "' + a + '" based on automated romanization of Ukrainian label')) try: ukval = il["uk"] ukroman = unidecode(ukval) ukroman_fixed = ukroman.replace("'","") # rm 's because Ukrainian Nat'l Syst. doesn't use them anymore global total if total < max: total += 1 labelAs(ukroman_fixed) log("updates",title,"label") print("updated " + title + " label (#" + str(total) + ")") else: sys.exit("total reached: " + str(max)) except pywikibot.exceptions.OtherPageSaveError: log("dupeErrors",title,"") print("dupeError: " + title)
def generate_dictionary_for_each_language(self, number_of_words_per_lang=50000): """ Parameters ---------- number_of_words_per_lang : int, optional The minimum number of words from each language that should be included during training. Returns ------- lst : list A list of all the words of each language. output_class : list A list of the language labels for each word in "lst" """ self.all_words_list = [] self.output_class = [] for language in self.languages: wiki.set_lang(language) lst = [] #while len(list_of_acceptable_topics) < 100: while len(list(set(lst))) < number_of_words_per_lang: try: topic = wiki.random() page = wiki.page(topic) content = page.content content = unidecode(content) lst = lst + self.clean(content) except wiki.DisambiguationError: # skip if there is more than one Wikipedia page with that topic pass except wiki.PageError: # skip if page can't be accessed for some reason pass self.all_words_list = self.all_words_list + lst for i in lst: self.output_class.append(self.lang_dict[language]) return self.all_words_list, self.output_class
def updateServer(self, schid, ownID=0): from unidecode import unidecode if not ownID: (err, ownID) = ts3lib.getClientID(schid) (err, name) = ts3lib.getServerVariable( schid, ts3defines.VirtualServerProperties.VIRTUALSERVER_NAME) self.activity["details"] = unidecode(name) server_type = getServerType(schid) if server_type == ServerInstanceType.TEASPEAK: self.activity["assets"]["large_text"] = "TeaSpeak" self.activity["assets"]["large_image"] = "teaspeak" elif server_type == ServerInstanceType.VANILLA: self.activity["assets"]["large_text"] = "TeamSpeak 3" self.activity["assets"]["large_image"] = "teamspeak" elif server_type == ServerInstanceType.SDK: self.activity["assets"]["large_text"] = "TeamSpeak 3 SDK" self.activity["assets"]["large_image"] = "teamspeak" else: self.activity["assets"]["large_text"] = "Unknown" self.activity["assets"]["large_image"] = "broken_image" self.update = True
def leitura_cancao(caminho): # Le o documento canção dos tamanquinhos novo_conjunto = [] novo = [] vetor_palavras = [] # Faz um split das linhas com as palavras pra nao ficar numa linha so # elimando pontos e virgular desnessarios with open(caminho, encoding='utf-8') as file: for line in file: line = line.replace(",", "") representation = line.split(' ') for word in representation: x = word.encode('utf-8') x = x.replace(b'\xe2\x80\xa6', b'') word = x.decode('utf-8') novo.append(word) novo_conjunto.extend(novo) final = novo_conjunto vetor_palavras = final for i in range(len(vetor_palavras)): vetor_palavras[i] = vetor_palavras[i].replace("\n", "") for i in range(len(vetor_palavras)): vetor_palavras[i] = vetor_palavras[i].replace("?", "") for i in range(len(vetor_palavras)): vetor_palavras[i] = vetor_palavras[i].replace("!", "") for i in range(len(vetor_palavras)): vetor_palavras[i] = vetor_palavras[i].replace(".", "") for i in range(len(vetor_palavras)): vetor_palavras[i] = vetor_palavras[i].replace(":", "") for i in range(len(vetor_palavras)): vetor_palavras[i] = unidecode(vetor_palavras[i].lower()) vetor_palavras = sorted(vetor_palavras) return vetor_palavras
#Database connection conn = mysql.connector.connect(user='******', password='******' , host='localhost' , database='contactos') # Crear el cursor myCursor = conn.cursor() for row in range(sheet.nrows): atencion = sheet.cell_value(row , 0).encode('ascii', 'ignore').decode('ascii') proveedor = sheet.cell_value(row , 1).encode('ascii', 'ignore').decode('ascii') contacto = sheet.cell_value(row , 2).encode('ascii', 'ignore').decode('ascii') descripcion = sheet.cell_value(row , 3).encode('ascii', 'ignore').decode('ascii') #print sheet.cell_value(row , 4).encode('ascii', 'ignore').decode('ascii') print unidecode(sheet.cell_value(row , 4).strip())
import mysql.connector file = 'files/contactos.xls' workbook = xlrd.open_workbook( file, encoding_override="utf-8") # Abrir el archivo excel sheet = workbook.sheet_by_index( 0) # Eliger la hoja con el que vamos a trabajar print sheet.cell_value(0, 2) # index del valor fila e index del valor columna #print sheet.nrows #numero de filas #print sheet.ncols #numero de columnas #Database connection conn = mysql.connector.connect(user='******', password='******', host='localhost', database='contactos') # Crear el cursor myCursor = conn.cursor() for row in range(sheet.nrows): atencion = sheet.cell_value(row, 0).encode('ascii', 'ignore').decode('ascii') proveedor = sheet.cell_value(row, 1).encode('ascii', 'ignore').decode('ascii') contacto = sheet.cell_value(row, 2).encode('ascii', 'ignore').decode('ascii') descripcion = sheet.cell_value(row, 3).encode('ascii', 'ignore').decode('ascii') #print sheet.cell_value(row , 4).encode('ascii', 'ignore').decode('ascii') print unidecode(sheet.cell_value(row, 4).strip())
def save(self, *args, **kwargs): self.name_slug = slugify(unidecode(self.name)) super(User, self).save(*args, **kwargs)
spotify = spotipy.Spotify() results = spotify.search(q="artist:drake", type="artist") print results quote_page = 'http://metrolyrics.com/{}-lyrics-drake.html' filename = 'drake-songs.csv' songs = pd.read_csv(filename) for index, row in songs.iterrows(): page = urllib.request.urlopen(quote_page.format(row['song'])) soup = BeautifulSoup(page, 'html.parser') verses = soup.find_all('p', attrs={'class': 'verse'}) lyrics = '' for verse in verses: text = verse.text.strip() text = re.sub(r"\[.*\]\n", "", unidecode(text)) if lyrics == '': lyrics = lyrics + text.replace('\n', '|-|') else: lyrics = lyrics + '|-|' + text.replace('\n', '|-|') songs.at[index, 'lyrics'] = lyrics print('saving {}'.format(row['song'])) songs.head() print('writing to .csv') songs.to_csv(filename, sep=',', encoding='utf-8')
def save(self, *args, **kwargs): self.slug = slugify(unidecode(self.title)) super(Post, self).save(*args, **kwargs)
def _parse_card(card_json, expansion): try: db_card = card_funcs.find_card_by_name(card_json['name']) except card_funcs.CardNotFoundError: db_card = models.MagicCard() db_card.name = card_json['name'] #FIXME: This line breaks everything. db_card.search_name = unidecode(card_json['name'].lower().replace(u"'", u"").replace(u",", u"")) if 'cmc' in card_json: db_card.converted_mana_cost = card_json['cmc'] if 'manaCost' in card_json: db_card.mana_cost = card_json['manaCost'] if 'text' in card_json: db_card.rules_text = card_json['text'].replace(u"\n", u" | ") if 'power' in card_json: db_card.power = card_json['power'] if 'toughness' in card_json: db_card.toughness = card_json['toughness'] if 'loyalty' in card_json: db_card.loyalty = card_json['loyalty'] if 'layout' in card_json: db_layout = models.Layout.get_by(layout=card_json['layout']) if not db_layout: db_layout = models.Layout(layout=card_json['layout']) db_card.layout = db_layout if 'colors' in card_json: for card_color in card_json['colors']: db_color = models.Color.get_by(color=card_color) if not db_color: db_color = models.Color(color=card_color) db_card.colors.append(db_color) if 'supertypes' in card_json: for card_supertype in card_json['supertypes']: db_supertype = models.SuperType.get_by(supertype=card_supertype) if not db_supertype: db_supertype = models.SuperType(supertype=card_supertype) db_card.supertypes.append(db_supertype) if 'types' in card_json: for card_type in card_json['types']: db_type = models.CardType.get_by(magictype=card_type) if not db_type: db_type = models.CardType(magictype=card_type) db_card.card_types.append(db_type) if 'subtypes' in card_json: for card_subtype in card_json['subtypes']: db_subtype = models.SubType.get_by(subtype=card_subtype) if not db_subtype: db_subtype = models.SubType(subtype=card_subtype) db_card.subtypes.append(db_subtype) if 'rulings' in card_json: for card_ruling in card_json['rulings']: models.Ruling(date=datetime.datetime.strptime(card_ruling['date'], "%Y-%m-%d").date(), text=card_ruling['text'], card=db_card) if 'legalities' in card_json: for format_pair in card_json['legalities']: db_format = models.Format.get_by(format_name=format_pair['format']) if db_format is not None: models.Legality(card=db_card, format_name=db_format, legality=format_pair['legality']) finally: try: db_cardrelease = models.CardRelease() db_cardrelease.expansion = expansion db_cardrelease.card = db_card if 'originalText' in card_json: if expansion.name == card_json['printings'][0]: db_card.printed_text = card_json['originalText'].replace(u"\n", u" | ") if 'rarity' in card_json: db_rarity = models.Rarity.get_by(rarity=card_json['rarity']) if not db_rarity: db_rarity = models.Rarity(rarity=card_json['rarity'], abbreviation=card_json['rarity'][0]) db_cardrelease.rarity = db_rarity if 'multiverseid' in card_json: db_cardrelease.multiverse_id = card_json['multiverseid'] if 'flavor' in card_json: db_cardrelease.flavor_text = card_json['flavor'] db_cardrelease.mtgoprice = models.MTGOPrice() except Exception as e: print e print card_json
def updateClient(self, schid): (err, name) = ts3lib.getClientSelfVariable( schid, ts3defines.ClientProperties.CLIENT_NICKNAME) from unidecode import unidecode self.activity["assets"]["large_text"] = unidecode(name)
def labelItem(page,title,item,il,ia): def setData(a,b,c,d): global total if total < max: old_ec = page.revision_count() item.editEntity(a,summary=b) page.purge() new_ec = page.revision_count() if not new_ec == old_ec: total += 1 if c == "dab": print("Updated " + title + " labels and aliases (dab): " + d + " (#" + str(total) + ")") log("updates",title,"labels and aliases (dab)") elif c == "unanimity": log("updates",title,"labels and aliases (unanimity)") print("Updated " + title + " labels and aliases (unanimity): " + d + " (#" + str(total) + ")") else: if c == "dab": print(title + " labels and aliases not changed (dab)") elif c == "unanimity": print(title + " labels and aliases not changed (unanimity)") else: sys.exit("max number of edits reached") try: lda1 = '{"labels": {' lda2 = '"aliases": {' for lang in il: if re.search(",| \(", il[lang]): ilr = re.sub("(,| \().*","",il[lang]) lda1 = lda1 + '"' + lang + '": "' + ilr + '", ' lda2 = lda2 + '"' + lang + '": ["' + il[lang] + '", "' + unidecode(il[lang]) + '"], ' lda3 = lda1.rstrip(", ") + "}, " # create dict of lang values and labels lda4 = lda2.rstrip(", ") + "}}" lda5 = lda3 + lda4 setData(json.loads(lda5),u"([[WD:Requests_for_permissions/Bot/AmpersandBot_2|TRIAL RUN]]: block if malfunctioning) Removed disambiguation from labels & set old labels as aliases","dab",lda3) latlangs = ("en","to","tet","tum","tn","tpi","wa","wo","war","yo","ts","st","io","bi","kbp","prg","kri","ie","kab","gn","ia","jam","hil","kr","lij","pam","lad","ltg","ln","mh","jbo","ku","mi","pih","olo","nov","tw","vo","ang","din","ha","atj","pdc","pfl","nb","nn","fo","kw","kl","nds","stq","fy","ik","ace","aa","ak","frr","rup","ast","bar","ext","ee","szl","ksh","ve","vec","vep","vro","eo","fr","de","vot","cho","ceb","co","bjn","map-bms","bm","ch","chy","ny","cbk-zam","fj","hz","fur","ff","gag","ht","kaa","rw","ig","rn","ki","ho","csb","liv","kg","krj","arn","lmo","nap","pap","om","pag","nso","se","sg","sat","scn","sc","srn","jv","sn","sm","ss","tl","li","gsw","sgs","pcd","nds-nl","mus","eml","de-at","vmf","zea","aln","lfn","roa-tara","su","tay","jut","pdt","gor","sr-el","simple","crh-latn","tt-latn","is","la","fi","ay","qu","ca","nl","sw","frc","gcr","ro","mg","en-ca","en-gb","eu","cs","sv","da","id","sq","lb","sk","sl","et","lv","lt","ga","mt","an","ms","tk","az","gl","cy","gd","zu","br","gv","rm","rmy","xh","za","hsb","so","dsb","sma","nah","na","nv","oc","af","sco","frp","pms","nrm","mwl","min","ruq") # list of all WD-supported langs using Latin script il_list = [] for lang in latlangs: if lang in il: il_list.append(il[lang]) split = "no" if len(il_list) > 1: try: a = 1 while a: if il_list[0] == il_list[a]: a += 1 else: split = "yes" break except IndexError: pass if split == "no": newLabel = il_list[0] ldb1 = '{"labels": {' for lang in latlangs: if not lang in il: ldb1 = ldb1 + '"' + lang + '": "' + newLabel + '", ' ldb2 = ldb1.rstrip(", ") + "}" if not unidecode(newLabel) == newLabel: ias = str(ia).strip("{").rstrip("}").replace("'",'"') # old aliases so they don't get deleted ldb3 = ldb2 + ', "aliases' + '": {' + ias + ", " # another dict, but this time add aliases too for lang in latlangs: ldb3 = ldb3 + '"' + lang + '": ["' + unidecode(newLabel) + '"], ' ldb4 = ldb3.rstrip(", ") + "}}" else: ldb4 = ldb2 + "}" setData(json.loads(ldb4),u"([[WD:Requests_for_permissions/Bot/AmpersandBot_2|TRIAL RUN]]: block if malfunctioning) Set all Latin-script languages' labels to match ones already used","unanimity",newLabel) elif split == "yes": log("LatSplit",title,"") print(title + " Latin-script labels not unanimous") except pywikibot.exceptions.OtherPageSaveError: log("APIErrors",title,"while labeling/aliasing") print("APIError: " + title)
def create_Receipt(event_description, event_code, event_date, fname, lname, price_perpax, email_student, waiting_list_jein): temp_folder = receipt_temp_folder + event_code # For temp receipts, we have a subfolder for each event. perm_folder = receipt_perm_folder + event_code if not os.path.exists(temp_folder): # Create receipt folder for each event if not done yet os.makedirs(temp_folder) if not os.path.exists(perm_folder): # Create receipt folder for each event if not done yet os.makedirs(perm_folder) # Contents of receipt receipt_number = str(random_with_N_digits(8)) if waiting_list_jein == False: # For those in the regular list receipt_name = "Register_" + receipt_number + "_" + unidecode(fname + lname) + ".pdf" receipt_description = "Event name : " + event_description else: # For those in the waiting list receipt_name = "WL_" + receipt_number + "_" + unidecode(fname + lname) + ".pdf" receipt_description = "Event name : " + event_description + " (Waiting List)" receipt_event_code = "Event code : " + event_code receipt_event_date = str(event_date) year, month, day = map(str, receipt_event_date.split('-')) event_date = day + "/" + month + "/" + year receipt_event_date = "Event date : " + str(event_date) receipt_participant_name = "Participant : " + fname + " " + lname system_timestamp = time.strftime('%d/%m/%Y %X') receipt_timestamp = "Date and time of payment :" + " " + system_timestamp receipt_amount = "Amount paid : €" + price_perpax receipt_number = "Receipt number : " + receipt_number save_name = os.path.join(os.path.expanduser("~"), temp_folder, receipt_name) c = canvas.Canvas(save_name, pagesize=A4) receipt_temp_path = temp_folder + "/" + receipt_name receipt_perm_path = perm_folder + "/" + receipt_name # header text c.setFont(size=14, psfontname="Helvetica", leading=None) c.drawString(70, 670, "International Centre") c.drawString(70, 650, "Technical University of Munich") c.drawString(70, 630, "Arcisstr. 21, 80333 Munich") c.drawString(70, 610, "Germany") c.setFont(size=22, psfontname="Helvetica-Bold", leading=None) c.drawCentredString(x=300, y=570, text="RECEIPT") c.setFont(size=14, psfontname="Helvetica", leading=None) c.drawString(70, 530, receipt_description) c.drawString(70, 510, receipt_event_code) c.drawString(70, 490, receipt_event_date) c.drawString(70, 450, receipt_participant_name) c.drawString(70, 430, receipt_timestamp) c.drawString(70, 410, receipt_amount) c.drawString(70, 370, receipt_number) c.drawString(70, 310, "*Please bring this receipt to deregister from the event.") c.drawString(70, 290, "**Deregistration deadline: 7 days before the event.") c.drawImage(image='index.png', x=295, y=700, preserveAspectRatio=False, width=229.2, height=79.2) c.showPage() c.save() send_Receipt(fname, lname, event_description, receipt_temp_path, receipt_name, email_student, receipt_perm_path, waiting_list_jein) # Function created to easily on / off send receipt feature
def eventbrite_event_pull(): """ MAIN API FUNCTION Get top 250 artists from the SQL table with relevant artists (they actually have upcoming events on stubhub) Pull pickled JSON file from S3, turn into Pandas DF Loop through these artists, making a request to the Eventbrite API for each encoded artist string Only keep records where the event name has an adequate fuzzy match score to the artist name Format items in API JSON response Insert into MYSQL, DynamoDB (NoSQL), and create local Pandas DF within loop Append local DF to pandas DF from S3, overwrite in s3 """ """GET ARTISTS DATAFRAME""" artists_df = data_fetch_pymysql().head(10) """CURRENT DATE ASSIGNMENT""" current_date = datetime.now() """DEFINE DYNAMODB ENDPOINT""" dynamodb = boto3.resource('dynamodb') dynamotable = dynamodb.Table('EventBrite_Event_Table') """PULL BACK ALL EVENTBRITE RECORDS FROM S3 BUCKET, FOR APPENDING LATER""" s3_client = boto3.client('s3') try: bucket = 'willjeventdata' key = 'eventbrite_events.pkl' key_temp = 'eventbrite/temp data/eventbrite_temp.json' key_json = 'eventbrite/main data/eventbrite_events.json' response = s3_client.get_object(Bucket=bucket, Key=key) event_dict = (response['Body'].read()) event_json = json.loads(event_dict.decode('utf8')) # master_event_df = pd.DataFrame.from_dict(event_json) print('The S3 JSON list started with ' + str(len(event_json)) + ' records') temp_df = pd.DataFrame() for artist_dat in artists_df.iterrows(): spotify_artist = artist_dat[1]['artist'] spotify_artist_id = artist_dat[1]['artist_id'] artist_encode = (spotify_artist.replace("&", " ")).replace(" ", "%20") artist_url = ( base_string + "expand=ticket_availability,external_ticketing,venue&" + "q=" + artist_encode) print(artist_url) try: rawdat = urllib.request.urlopen(artist_url) encoded_dat = rawdat.read().decode('utf-8', errors='ignore') json_dat = json.loads(encoded_dat) events = json_dat['events'] for event in events: try: event_name = ((event['name']['text']).replace( '"', '')).encode('utf-8') name_decode = unidecode( str(event_name, encoding="utf-8")).replace('"', '') Spotify_name = spotify_artist EventBrite_name = event_name fuzz_partial = fuzz.partial_ratio( Spotify_name.lower(), EventBrite_name.lower()) fuzz_ratio = fuzz.ratio(Spotify_name.lower(), EventBrite_name.lower()) if (fuzz_ratio + fuzz_partial) > 150: # print(event_name) # print(fuzz_partial) # print(fuzz_ratio) event_id = event['id'] event_venue = event['venue']['name'] event_city = event['venue']['address']['city'] event_state = event['venue']['address']['region'] event_date_UTC = event['start']['utc'] lowest_price = event['ticket_availability'][ 'minimum_ticket_price']['major_value'] highest_price = event['ticket_availability'][ 'maximum_ticket_price']['major_value'] capacity = event['venue']['capacity'] sold_out_indicator = event['ticket_availability'][ 'is_sold_out'] shareable = event['shareable'] available_elsewhere = event[ 'is_externally_ticketed'] """MYSQL INSERTION""" insert_tuple = (spotify_artist, spotify_artist_id, event_name, event_id, event_venue, event_city, event_state, event_date_UTC, lowest_price, highest_price, capacity, sold_out_indicator, shareable, available_elsewhere, current_date) # print(insert_tuple) event_QL = 'INSERT INTO `EVENTBRITE_EVENTS` (`artist`, `artist_id`, `name`, `id`, `venue`, `city`, `state`, `date_UTC`, `lowest_price`, `highest_price`, `capacity`, `sold_out`, `shareable`, `available_elsewhere`, `create_ts`) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)' # print(event_QL) connection = pymysql.connect( host= 'ticketsdb.cxrz9l1i58ux.us-west-2.rds.amazonaws.com', user='******', password='******', db='tickets_db') cursor = connection.cursor() cursor.execute(event_QL, insert_tuple) connection.commit() """DYNAMODB INSERTION""" event_key = (name_decode + str(event_id) + event_venue + event_city + event_state + str(event_date_UTC) + str(current_date)) # print(event_key) dynamotable.put_item( Item={ 'Event_ID': event_key, 'name': event_name, 'artist': spotify_artist, 'city': event_city, 'date_UTC': str(event_date_UTC), 'state': event_state, 'venue': event_venue, 'capacity': capacity, 'create_ts': str(current_date), 'lowest_price': lowest_price, 'highest_price': highest_price, 'sold_out': sold_out_indicator, 'shareable': shareable, 'available_elsewhere': available_elsewhere }) """S3 NEW DATA CREATION""" event_array = pd.DataFrame( [[ spotify_artist, spotify_artist_id, event_name, event_id, event_venue, event_city, event_state, event_date_UTC, lowest_price, highest_price, capacity, sold_out_indicator, shareable, available_elsewhere, current_date ]], columns=[ 'artist', 'artist_id', 'name', 'event_id', 'venue', 'city', 'state', 'date_UTC', 'lowest_price', 'highest_price', 'capacity', 'sold_out', 'shareable', 'available_elsewhere', 'create_ts' ]) temp_df = temp_df.append(event_array, ignore_index=True, sort=True) except TypeError as no_data: print('One of the fields was missing') except urllib.error.HTTPError: print('Bad Request') """APPEND LOCAL DF TO MASTER DF PULLED FROM S3""" # master_event_df = master_event_df.append(temp_df, sort=True) # print('The S3 JSON list now has ' + str(len(master_event_df)) + ' records') """DICT APPEND METHOD""" """S3 RESOURCE""" s3_resource = boto3.resource('s3') """MAKE DICT FROM TEMP DATAFRAME""" temp_dict = temp_df.to_dict('records') """MERGE TEMP DICT AND MASTER DICT""" appended_dict = event_json + temp_dict print('The S3 JSON list now has ' + str(len(appended_dict)) + ' records') """S3 FROM TEMP DICT""" temp_dict_stg = json.dumps(temp_dict, default=myconverter) temp_dict_json = temp_dict_stg.replace('[{', '{').replace(']}', '}').replace( '},', '}\n') # s3_resource.Object(bucket, key_temp).put(Body=temp_dict_stg) s3_resource.Object(bucket, key_temp).put(Body=temp_dict_json) """S3 PKL FROM APPENDED DICT""" appended_dict_stg = json.dumps(appended_dict, default=myconverter) # s3_resource.Object(bucket, key).put(Body=appended_dict_stg) s3_resource.Object(bucket, key).put(Body=appended_dict_stg) print('successfully overwrote the PKL file which now has ' + str(len(appended_dict)) + ' records') """S3 JSON FROM APPENDED DICT""" appended_json = appended_dict_stg.replace('[{', '{').replace( ']}', '}').replace('},', '}\n') # s3_resource.Object(bucket,key_json).put(Body=appended_json) s3_resource.Object(bucket, key_json).put(Body=appended_json) print('successfully overwrote main JSON file which now has ' + str(len(appended_dict)) + ' records') """ATHENA CREATE DROP AND CREATE MAIN TABLE""" columns_string = str(temp_df.columns.values).replace( "['", "`").replace(" '", " `").replace("']", '` string').replace( "' ", "` string, ").replace("'\n", "` string, ").replace( "`date_UTC` string", "`date_UTC` timestamp").replace("`create_ts` string", "`create_ts` timestamp") athena_drop_main() time.sleep(10) athena_create_main(columns_string) """ATHENA DROP AND CREATE TEMP TABLE""" columns_string = str(temp_df.columns.values).replace( "['", "`").replace(" '", " `").replace("']", '` string').replace( "' ", "` string, ").replace("'\n", "` string, ").replace( "`date_UTC` string", "`date_UTC` timestamp").replace("`create_ts` string", "`create_ts` timestamp") athena_drop_temp() time.sleep(10) athena_create_temp(columns_string) except s3_client.exceptions.NoSuchKey: print('THE S3 BUCKET SOMEHOW GOT DELETED...')
def genius(artist, title, return_url=False): line = f"{artist}-{title}" line = re.sub("[,._@!#%^*+:;'()]", "", line) line = line.replace("]", "") line = line.replace("[", "") line = line.replace("?", "") line = line.replace(" ", "-") line = line.replace("/", "-") line = line.replace("-&-", "-and-") line = line.replace("&", "-and-") line = unidecode(line).decode() line = urllib.parse.quote(line) line = f"https://genius.com/{line}-lyrics" if return_url: return line page = requests.get(line) html = BeautifulSoup(page.text, 'html.parser') result = html.find('div', class_='lyrics') #.get_text() if result is not None: lyrics = result.get_text() lyrics2 = [] for line in lyrics.splitlines(): if line.startswith("["): pass else: lyrics2.append(line) lyrics = "\n".join(lyrics2) lyrics = lyrics.strip("\n") return lyrics # New layout type else: results = html.findAll( "div", {"class": lambda l: l and "Lyrics__Container" in l}) lyrics = "".join([r.get_text("\n") for r in results]) level = 0 new = "" for cha in lyrics: if level <= 0: new += cha if cha == "[": level += 1 if cha == "]": level -= 1 lyrics = new lines = lyrics.splitlines() new_lines = [] for line in lines: if "[" in line: line = line.split("[", 1)[0] if line: line += "\n" new_lines.append(line.lstrip().rstrip(" ") + "\n") lyrics = "".join(new_lines) lyrics = lyrics.replace("(\n", "(") lyrics = lyrics.replace("\n)", ")") lyrics = lyrics.lstrip("\n") lyrics = lyrics.lstrip() return lyrics
def parse_xword(self, xword_data): puzzle = puz.Puzzle() metadata = xword_data.get('puzzle_meta') puzzle.author = metadata.get('author').strip() puzzle.copyright = metadata.get('copyright').strip() puzzle.height = metadata.get('height') puzzle.width = metadata.get('width') if metadata.get('notes'): puzzle.notes = metadata.get('notes')[0]['txt'].strip() date_string = metadata.get('printDate') self.date = datetime.datetime.strptime(date_string, '%Y-%m-%d') puzzle.title = metadata.get('title') or self.date.strftime( '%A, %B %d, %Y') puzzle_data = xword_data['puzzle_data'] solution = '' fill = '' markup = b'' rebus_board = [] rebus_index = 0 rebus_table = '' for idx, square in enumerate(puzzle_data['answers']): if not square: solution += '.' fill += '.' rebus_board.append(0) elif len(square) == 1: solution += square fill += '-' rebus_board.append(0) else: solution += square[0][0] fill += '-' rebus_board.append(rebus_index + 1) rebus_table += '{:2d}:{};'.format(rebus_index, square[0]) rebus_index += 1 markup += (b'\x80' if puzzle_data['layout'][idx] == 3 else b'\x00') puzzle.solution = solution puzzle.fill = fill clue_list = puzzle_data['clues']['A'] + puzzle_data['clues']['D'] clue_list.sort(key=lambda c: c['clueNum']) puzzle.clues = [unidecode(c['value']).strip() for c in clue_list] if b'\x80' in markup: puzzle.extensions[b'GEXT'] = markup puzzle._extensions_order.append(b'GEXT') puzzle.markup() if any(rebus_board): puzzle.extensions[b'GRBS'] = bytes(rebus_board) puzzle.extensions[b'RTBL'] = rebus_table.encode(puz.ENCODING) puzzle._extensions_order.extend([b'GRBS', b'RTBL']) puzzle.rebus() return puzzle
def _parse_card(card_json, expansion): try: db_card = card_funcs.find_card_by_name(card_json['name']) except card_funcs.CardNotFoundError: db_card = models.MagicCard() db_card.name = card_json['name'] #FIXME: This line breaks everything. db_card.search_name = unidecode(card_json['name'].lower().replace( u"'", u"").replace(u",", u"")) if 'cmc' in card_json: db_card.converted_mana_cost = card_json['cmc'] if 'manaCost' in card_json: db_card.mana_cost = card_json['manaCost'] if 'text' in card_json: db_card.rules_text = card_json['text'].replace(u"\n", u" | ") if 'power' in card_json: db_card.power = card_json['power'] if 'toughness' in card_json: db_card.toughness = card_json['toughness'] if 'loyalty' in card_json: db_card.loyalty = card_json['loyalty'] if 'layout' in card_json: db_layout = models.Layout.get_by(layout=card_json['layout']) if not db_layout: db_layout = models.Layout(layout=card_json['layout']) db_card.layout = db_layout if 'colors' in card_json: for card_color in card_json['colors']: db_color = models.Color.get_by(color=card_color) if not db_color: db_color = models.Color(color=card_color) db_card.colors.append(db_color) if 'supertypes' in card_json: for card_supertype in card_json['supertypes']: db_supertype = models.SuperType.get_by( supertype=card_supertype) if not db_supertype: db_supertype = models.SuperType(supertype=card_supertype) db_card.supertypes.append(db_supertype) if 'types' in card_json: for card_type in card_json['types']: db_type = models.CardType.get_by(magictype=card_type) if not db_type: db_type = models.CardType(magictype=card_type) db_card.card_types.append(db_type) if 'subtypes' in card_json: for card_subtype in card_json['subtypes']: db_subtype = models.SubType.get_by(subtype=card_subtype) if not db_subtype: db_subtype = models.SubType(subtype=card_subtype) db_card.subtypes.append(db_subtype) if 'rulings' in card_json: for card_ruling in card_json['rulings']: models.Ruling(date=datetime.datetime.strptime( card_ruling['date'], "%Y-%m-%d").date(), text=card_ruling['text'], card=db_card) if 'legalities' in card_json: for format_pair in card_json['legalities']: db_format = models.Format.get_by( format_name=format_pair['format']) if db_format is not None: models.Legality(card=db_card, format_name=db_format, legality=format_pair['legality']) finally: try: db_cardrelease = models.CardRelease() db_cardrelease.expansion = expansion db_cardrelease.card = db_card if 'originalText' in card_json: if expansion.name == card_json['printings'][0]: db_card.printed_text = card_json['originalText'].replace( u"\n", u" | ") if 'rarity' in card_json: db_rarity = models.Rarity.get_by(rarity=card_json['rarity']) if not db_rarity: db_rarity = models.Rarity( rarity=card_json['rarity'], abbreviation=card_json['rarity'][0]) db_cardrelease.rarity = db_rarity if 'multiverseid' in card_json: db_cardrelease.multiverse_id = card_json['multiverseid'] if 'flavor' in card_json: db_cardrelease.flavor_text = card_json['flavor'] db_cardrelease.mtgoprice = models.MTGOPrice() except Exception as e: print e print card_json
def scrape_data(start_date, from_place, to_place, city_name): # from_place to_input = driver.find_elements_by_class_name('LJTSM3-p-a')[0] to_input.click() actions = ActionChains(driver) a = from_place actions.send_keys(a) actions.send_keys(Keys.ENTER) actions.perform() time.sleep(1.5) # to_place to_input = driver.find_elements_by_class_name('LJTSM3-p-a')[1] to_input.click() actions = ActionChains(driver) b = to_place actions.send_keys(b) actions.send_keys(Keys.ENTER) actions.perform() time.sleep(1.5) # start_date url_changed_date = driver.current_url[:-10] + start_date driver.get(url_changed_date) time.sleep(1.5) # collect all output city names and store them into a list: results = driver.find_elements_by_class_name('LJTSM3-v-c') # print results[0].text #Portland citi_list_ori = [] for i in range(len(results)): one_city_name = results[i].text.split(',')[0] citi_list_ori.append(one_city_name) citi_list = [] for city in citi_list_ori: city = unidecode(city) citi_list.append(city) # get particular city: for c in range(len(citi_list)): if city_name != citi_list[c]: continue else: # get Date & Price for that particular city: results = driver.find_elements_by_class_name('LJTSM3-v-d') time.sleep(0.2) test = results[c] bars = test.find_elements_by_class_name( 'LJTSM3-w-x') # bars contain dates and price time.sleep(2.0) data_date_ori = [] data_date = [] data_price = [] for bar in bars: ActionChains(driver).move_to_element(bar).perform() time.sleep(0.05) data_date_ori.append( test.find_element_by_class_name( 'LJTSM3-w-k').find_elements_by_tag_name('div')[1].text) # print len(data_date_ori) for i in range(len(data_date_ori)): start_date = str(data_date_ori[i]).split('- ')[0] data_date.append(start_date) time.sleep(0.05) # print data_date, len(data_date) for bar in bars: ActionChains(driver).move_to_element(bar).perform() time.sleep(0.05) data_price.append( test.find_element_by_class_name( 'LJTSM3-w-k').find_elements_by_tag_name('div')[0].text) time.sleep(0.05) # print data_price,len(data_price) scrape_data_date_price = pd.DataFrame({ 'Date_Start': data_date, 'Price_of_TimeRange': data_price }) return scrape_data_date_price