def main(**kwargs): file_types = ('.jpg', '.jpeg', '.png', '.gif', '.webm') if isinstance(kwargs.get('folder'), list): files = [] for folder in kwargs.get('folder'): files += [file for file in os.listdir(folder) if file.endswith(file_types)] else: files = [file for file in os.listdir( kwargs.get('folder')) if file.endswith(file_types)] used_images = [] a_t = kwargs.get('after type') if a_t == "move to folder" or a_t == "1": move_to = kwargs.get('move folder') else: try: txt_name = kwargs.get('used images') used_images = open( kwargs.get('used images'), 'r').read().splitlines() except: txt_name = os.path.join(os.getcwd(), "Used folder {0}.txt".format( kwargs['bot name'])) if not os.path.exists(txt_name): print("Didn't find any used links! Creating a TXT!") print("Set it to:\n{0}".format(txt_name)) used_images = [] else: used_images = open(txt_name, 'r').read().splitlines() if utils.is_bool(kwargs.get('reset images')) and len(files) < 4 or \ utils.is_bool(kwargs.get('reset images')) and ( (len(files) - len(used_images) < 4)): if a_t == "text file" or a_t == "2" or a_t is None: used_images = used_images[:-4] else: reset_files(move_to, kwargs.get('folder')) try: image = random.choice(files) except: return False, False # Fail safe so we don't get stuck in a inf loop break_count = 0 if a_t == "text file" or a_t == "2" or a_t is None: write_list = True while image in used_images or image is None: image = random.choice(files) break_count += 1 if break_count == 50 or not image: write_list = False break if write_list: used_images.append(image) with open(txt_name, 'w+') as f: f.write("\n".join(used_images)) else: from threading import Thread Thread(name="Move Image", target=move_image, args=( os.path.join(kwargs.get('folder'), image), move_to)).start() sn_kwgs = {} if "sn" in kwargs.get('message'): sn_url, sn_kwgs = utils.saucenao(fname=image[0], api_key=kwargs.get('saucenao api'), metainfo=True) re_dict = {'(questionmark)': '?', '(star)': '*', '_': ' '} rep = {'{filename clean}': utils.replace_all( os.path.splitext(image)[0], re_dict), '{filename}': os.path.splitext(os.path.basename(image))[0], '{index}': files.index(image), '{hash}': hashlib.md5( open(os.path.join( kwargs.get('folder'), image), 'rb').read()).hexdigest(), '{sn title}': sn_kwgs.get('title'), '{sn illust id}': sn_kwgs.get('illust id'), '{sn illust url}': sn_url, '{sn artist}': sn_kwgs.get('artist'), '{sn artist id}': sn_kwgs.get('artist id'), '{sn artist url}': sn_kwgs.get('artist url')} message = utils.replace_all(kwargs.get('message'), rep) image = os.path.join(kwargs.get('folder'), image) return(message, image)
def create_presenter(dic_list): for phd in dic_list: # 1.读取presenter模板 f_pres = open(template_pres) cont_pres = f_pres.read() # 生成Presenter out_path_pres = '%s/impl/%sPresenter.java' % (path_pres, phd['ApiName']) fp = open(out_path_pres, 'w+') fp.write(replace_all(cont_pres, phd)) fp.close() # 2.读取IPresenter接口模板 f_ipres = open(template_ipres) cont_ipres = f_ipres.read() # 生成IPresenter out_path_ipres = '%s/I%sPresenter.java' % (path_pres, phd['ApiName']) fip = open(out_path_ipres, 'w+') fip.write(replace_all(cont_ipres, phd)) fip.close() # 2.读取IView模板 f_iview = open(template_view) cont_iview = f_iview.read() # 生成IView out_path_iview = '%s/I%sView.java' % (path_view, phd['ApiName']) fv = open(out_path_iview, 'w+') fv.write(replace_all(cont_iview, phd))
def create_model(dic_list): for phd in dic_list: print('--->\t' + phd['API_NAME']) # 处理IModel ftem = open(template_int) content = ftem.read() created_file_name = 'I%sModel.java' % (phd['ApiName']) created_file = open(path_model + created_file_name, 'w+') created_file.write(replace_all(content, phd)) created_file.close() # 处理ModelImpl if phd['method'].lower() == 'get': fimp = open(template_impl_get) else: fimp = open(template_impl) imp_content = fimp.read() create_impl_name = "%sModel.java" % (phd['ApiName']) create_impl = open(path_model + '/impl/' + create_impl_name, 'w+') create_impl.write(replace_all(imp_content, phd)) create_impl.close() # 在HttpConstants里声明KEY和URL fconstant = open(path_const, 'r+') constant_content = fconstant.read() constant_content = constant_content.strip().strip('}') # 写入文件末尾 if 'URL_%s ' % (phd['API_NAME']) not in constant_content: fconstant.truncate() fconstant.seek(0) fconstant.write(constant_content) fconstant.write('\n\t// v1.2 auto-generate\n') add_path = '' if 'path' in phd and phd['path'] != '': path = phd['path'] add_path = ' + %s' % (path.upper()) if constant_path % (path.upper(), path) not in constant_content: fconstant.write(constant_path % (path.upper(), path)) fconstant.write(constant_key % (phd['API_NAME'], phd['apiName'])) fconstant.write(constant_url % (phd['API_NAME'], add_path, phd['API_NAME'])) fconstant.write('\n}') else: print('---> skip add %s in HttpConstant.java' % (phd['API_NAME'])) fconstant.close()
def shorten(filename): """Shorten a filename into a managable length""" # Only shorten on windows if not sys.platform.startswith('win'): return filename filename = REMOVE_PARENTHESES.sub('', filename) return utils.replace_all(filename, config.SHORTNAME_MAP)
def triangle_zero_replace(triangle, replacement): """Returns a triangle with zeros replaced by the given replacement factor. >>> triangle_zero_replace([[1, 0, 1, 1], [1, 0, 1], [1, 0], [1]], -1) [[1, -1, 1, 1], [1, -1, 1], [1, -1], [1]] """ return [utils.replace_all(row, replacement) for row in triangle]
def start_crawl(cnt=28, write_file=False, show_output=True): if cnt > 28: cnt = 28 try: if (write_file): dict_list = [] landing_page = utils.crawl_page(URL) data_rows = landing_page.findAll('tr', {"class": ["gr", "gr_even"]}) print('PTWC (Pacific Tsunami Warning Center) (Past 30 days)') print('URL:', URL) for idx, row in enumerate(data_rows): if (idx >= cnt): break datum_dict = { "time": row.findAll('td')[0].text, "region": row.findAll('td')[1].text, "type": row.findAll('td')[2].text, "details_link": URL + row.findAll('td')[4].findAll('a')[1]['href'] } details_page = utils.crawl_page( datum_dict['details_link']).find('body').text evaluation_re = 'EVALUATION(\r\n|\r|\n){2}([ \w.]+(\r\n|\r|\n))+(\r\n|\r|\n)' evaluation_match = re.search(evaluation_re, details_page) if (evaluation_match): replace_dict = {"EVALUATION": '', "\r": '', "\n": '', "\t": ''} evaluation_match = utils.replace_all(evaluation_match.group(0), replace_dict) datum_dict['evaluation'] = evaluation_match else: print('NO EVALUATION FOUND') if (show_output): utils.print_dict(datum_dict) if (write_file): dict_list.append(datum_dict) if (write_file): utils.write_json_file(WEBSITE, dict_list) except Exception as e: print('err:', str(e))
def get_embed_from_deal(deal: Deal) -> discord.Embed: if deal.store_id == '1': link = f'https://store.steampowered.com/app/{deal.steam_app_id}' else: replace_dict = {' - ': ' ', "'": '', '.': '', ':': '', ' ': '_'} formatted_title = replace_all(deal.title, replace_dict).lower() link = f'https://www.gog.com/game/{formatted_title}' embed = discord.Embed( title=deal.title, description=f"*Sale price:* **{deal.sale_price}$**\n" f"*Normal price:* **{deal.normal_price}$**\n" f"*You save*: **{deal.saved_amount()}$ ({deal.saved_percentage}% off)**\n\n" f"*Steam reviews:* **{deal.steam_reviews_count}** " f"*({deal.steam_reviews_percent}% positive)*\n" f"*Link:* {link}/", colour=colour_picker(deal.saved_percentage)) embed.set_image(url=deal.thumbnail_url) return embed
def identify_by_type_and_country_code(value: str, offset: int, types: List[str], max_attempts: int) -> Optional[str]: identified: bool = False for attempt in range(max_attempts): if identified: break index: int = offset - (max_attempts - attempt) if index < 0: index = offset - attempt if index < 0: break subset: str = value[index:] for i in range(len(subset)): if identified: break chars: str = replace_all(subset[i:], transliteration_substitutions) for _type in types: if identified: break if not chars.startswith(_type): continue type_length = len(_type) country_length = 3 country_code = chars[type_length:type_length + country_length] if is_code(country_code): identified = True offset = max(index + i, 0) return value[offset:] if identified else None
def get_embed_from_deal(deal: Deal) -> discord.Embed: """Function that takes a Deal class object as input parameter and converts it into discord.py Embed object. :param deal: Deal dataclass object. :return: discord.py Embed class object. """ if deal.store_id == '1': deal_url = f'https://store.steampowered.com/app/{deal.steam_app_id}' else: replace_dict = {' - ': ' ', "'": '', '.': '', ':': '', ' ': '_'} formatted_title = replace_all(deal.title, replace_dict).lower() deal_url = f'https://www.gog.com/game/{formatted_title}' embed = discord.Embed( title=deal.title, description=f"*Sale price:* **{deal.sale_price}$**\n" f"*Normal price:* **{deal.normal_price}$**\n" f"*You save*: **{deal.saved_amount()}$ ({deal.saved_percentage}% off)**\n\n" f"*Steam reviews:* **{deal.steam_reviews_count}** " f"*({deal.steam_reviews_percent}% positive)*\n" f"*Link:* {deal_url}/", colour=colour_picker(deal.saved_percentage)) embed.set_image(url=deal.thumbnail_url) return embed
def shorten(filename): """Shorten a filename into a managable length""" filename = REMOVE_PARENTHESES.sub('', filename) return utils.replace_all(filename, config.SHORTNAME_MAP)
def get_image_online(**kwargs): if kwargs.get('used images'): txt_name = kwargs.get('used images') used_links = open(txt_name, 'r').read().splitlines() else: txt_name = os.path.join(os.getcwd(), "Used sankaku {0}.txt".format( kwargs['bot name'])) try: used_links = open(txt_name, 'r').read().splitlines() except: if not os.path.exists(txt_name): print("Didn't find any used links! Creating a TXT!") print("Set it to:\n{0}".format(txt_name)) used_links = [] else: used_links = open(txt_name, 'r').read().splitlines() if kwargs.get('highest page'): high_page = int(kwargs.get('highest page')) else: high_page = 50 tried_pages = [high_page] cookie_file = None try_count = 0 low_page = 0 page = 0 x = None no_images = False url_start = "https://chan.sankakucomplex.com" url_search = "https://chan.sankakucomplex.com/?tags=" if utils.is_bool(kwargs.get('login')): cookie_file = "../sankakucomplex.txt" url_login = "******" form_num = 0 form_user = "******" form_password = "******" username = kwargs.get('username') password = kwargs.get('password') if not os.path.exists(cookie_file): browser, s = utils.scrape_site(url_login, cookie_file, True) form = browser.get_form(form_num) form[form_user].value = username form[form_password].value = password browser.submit_form(form) s.cookies.save() if utils.is_bool(kwargs.get('save images')): if kwargs.get('path'): path = kwargs.get('path') else: path = os.path.abspath(os.path.join(os.getcwd(), "images")) if not os.path.exists(path): os.makedirs(path) else: path = os.path.abspath(os.path.join(os.getcwd())) if kwargs.get('tags'): if isinstance(kwargs.get('tags'), list): tags = '+'.join(kwargs.get('tags')) else: tags = '+'.join(kwargs.get('tags').split(', ')) else: tags = "" if kwargs.get('ignore tags'): if isinstance(kwargs.get('ignore tags'), list): ignore_tags = kwargs.get('ignore tags') else: ignore_tags = kwargs.get('ignore tags').split(', ') else: ignore_tags = [] if utils.is_bool(kwargs.get('ignore cosplay')): ignore_cosplay = utils.is_bool(kwargs.get('ignore cosplay')) else: ignore_cosplay = False if utils.is_bool(kwargs.get('accept webm')): accept_webm = utils.is_bool(kwargs.get('accept webm')) else: accept_webm = False tried_pages = [high_page + 1] while True: while True: while True: while True: no_images = False try_count += 1 if try_count == 15: return False, False page = str(int(random.randint(low_page, high_page) * 1)) while int(page) in tried_pages: if int(page) == 0: break if not x: x = high_page page = str(int( random.randint(low_page, high_page) * 1)) if int(page) > int(x): continue tried_pages.append(int(page)) x = min(tried_pages) page_url = "&page=" + str(page) url = "%s%s%s" % (url_search, tags, page_url) browser = utils.scrape_site(url, cookie_file) if browser.find('div', text="No matching posts"): no_images = True time.sleep(1) if not no_images: break elif no_images and int(page) == 0: return False, False good_image_links = [] image_links = browser.find_all('a') for link in image_links: try: link['href'] except: continue if "/post/show/" not in link['href']: continue good_image_links.append(link['href']) if good_image_links == []: return False, False random.shuffle(good_image_links) url = "%s%s" % (url_start, random.choice(good_image_links)) try_count = 0 while url in used_links: url = "%s/%s" % ( url_start, random.choice(good_image_links)) try_count = try_count + 1 if try_count == 20: break used_links.append(url) # Make a copy for better use in message post_url = url browser.open(url) if not accept_webm: if browser.find('video', attrs={'id': 'image'}): continue image_tags = [] char_tags = [] art_tags = [] sers_tags = [] tags_tags = [] site_tag = browser.find('ul', id="tag-sidebar") site_tag = site_tag.find_all('li') for taga in site_tag: tag = tag_clean(taga) if taga['class'][0] == "tag-type-artist": art_tags.append(tag.title()) elif taga['class'][0] == "tag-type-copyright": sers_tags.append(tag.title()) elif taga['class'][0] == "tag-type-character": char_tags.append(tag.title()) else: tags_tags.append(tag.title()) image_tags.append(tag.lower()) if any([item in [x.lower() for x in ignore_tags] for item in [x.lower() for x in image_tags]]): continue if ignore_cosplay: if any(" (cosplay)" in s for s in image_tags): continue break image_url = browser.find('img', attrs={'id': 'image'}) if not image_url: image_url = browser.find('video', attrs={'id': 'image'}) try: url = urllib.parse.urljoin("https:", image_url['src']) except: # Flash File continue filename = "" if not utils.is_bool(kwargs.get('message')): message = "" sn_kwgs = {} sn_url, sn_kwgs = utils.saucenao(url, kwargs['saucenao api'], True) re_dict = {'{#artist}': ( '#' if art_tags else '') + ' #'.join( [x.replace(" ", "_") for x in art_tags]), '{#character}': ( '#' if char_tags else '') + ' #'.join( [x.replace(" ", "_") for x in char_tags]), '{#series}': ( '#' if sers_tags else '') + ' #'.join( [x.replace(" ", "_") for x in sers_tags]), '{#tags}': ( '#' if tags_tags else '') + ' #'.join( [x.replace(" ", "_") for x in tags_tags]), '{artist}': ', '.join(art_tags), '{character}': ', '.join(char_tags), '{series}': ', '.join(sers_tags), '{tags}': ', '.join(tags_tags), '{url}': post_url, '{title}': sn_kwgs.get('title'), '{sn title}': sn_kwgs.get('title'), '{sn illust id}': sn_kwgs.get('illust id'), '{sn illust url}': sn_url, '{sn artist}': sn_kwgs.get('artist'), '{sn artist id}': sn_kwgs.get('artist id'), '{sn artist url}': sn_kwgs.get('artist url')} if kwargs.get('filename'): filename = utils.replace_all(kwargs.get('filename'), re_dict) filename = utils.safe_msg(filename) if kwargs.get('message'): message = utils.replace_all(kwargs.get('message'), re_dict) message = utils.safe_msg(message) with open(txt_name, 'w+') as f: f.write("\n".join(used_links)) tweet_image = utils.download_image(url, path, filename, **kwargs) if tweet_image: break if not utils.is_bool(kwargs.get('save images')): from threading import Thread Thread(name="Delete Image", target=delete_image, args=( tweet_image, )).start() return message, tweet_image
def test_replace_all(self): list1 = [0, 3, 2, 0] self.assertEqual(utils.replace_all(list1, -1), [-1, 3, 2, -1]) self.assertEqual(utils.replace_all(list1, "a"), ["a", 3, 2, "a"])
filename = sys.argv[-1] df = pd.read_excel(filename) abst = list( itertools.chain.from_iterable(df[['Abstract']].as_matrix().tolist())) titles = list(itertools.chain.from_iterable(df[['Title' ]].as_matrix().tolist())) eids = list(itertools.chain.from_iterable(df[['EID']].as_matrix().tolist())) #run acronyms and other preprocessing functions on data acronyms = list(map(lambda x: utils.findAcronyms(x), abst)) acronyms = list(itertools.chain.from_iterable(acronyms)) acroDict = {' ' + a[0] + ' ': ' ' + a[1] for a in acronyms} abst = [strip_punctuation(row) for row in abst] absCl = list(map(lambda x: utils.replace_all(x, acroDict), abst)) absCl = list(map(lambda x: utils.preprocess(x), absCl)) absCl = list(map(lambda x: utils.remove_non_ascii(x), absCl)) stopwords = nltk.corpus.stopwords.words('english') texts = [[word for word in abstract.lower().split() if word not in stopwords] for abstract in absCl] phrases = Phrases(texts) bigram = Phraser(phrases) trigram = Phrases(bigram[texts]) trigram = Phraser(trigram) texts = [trigram[bigram[text]] for text in texts] y = []
def backup_database(): back_up_name = "{}.db".format(replace_all(str(datetime.now()), '- .:', '')) print(back_up_name) copyfile(MAIN_DATABASE_PATH, back_up_name)
def get_reddit(**kwargs): if kwargs.get('used images'): txt_name = kwargs.get('used images') used_links = open(txt_name, 'r').read().splitlines() else: txt_name = os.path.join(os.getcwd(), "Used reddit {0}.txt".format( kwargs['bot name'])) try: used_links = open(txt_name, 'r').read().splitlines() except: if not os.path.exists(txt_name): print("Didn't find any used links! Creating a TXT!") print("Set it to:\n{0}".format(txt_name)) used_links = [] else: used_links = open(txt_name, 'r').read().splitlines() try: sub = used_links[0] used_links = used_links[1:] except: # Probably doesn't exist (i hope only that) pass if kwargs.get('save images'): if kwargs.get('path'): path = kwargs.get('path') else: path = os.path.abspath(os.path.join(os.getcwd(), "images")) if not os.path.exists(path): os.makedirs(path) else: path = os.path.abspath(os.path.join(os.getcwd())) start_url = "https://www.reddit.com/r/" subreddits = kwargs.get('subreddits') is_random = kwargs.get('random subreddit') is_random_link = kwargs.get('random link') if subreddits is None: return False, False if isinstance(subreddits, str): subreddits = subreddits.split(", ") if utils.is_bool(is_random): import random sub = random.choice(subreddits) else: # Get last used sub and + 1 try: sub = open(os.path.join(os.getcwd(), "Used reddit {0}.txt".format( kwargs['bot name'])), 'r').read().splitlines sub = subreddits[(subreddits.index(sub) + 1)] except: # Doesn't exsist / end of list sub = subreddits[0] url = start_url + sub + "/.rss" soup = utils.scrape_site(url, is_rss=True) pic_imgs = [] for a in soup.find_all('item'): img_string = a.find('description').string img_title = a.find('title').string img_link = a.find('link').string img_string = img_string[:img_string.index("[link]")] img_string = BeautifulSoup(img_string, 'html5lib').find_all('a') for item in img_string: if "reddit.com" not in item['href'] and "http" in item['href']: pic_imgs.append([item['href'], img_title, img_link]) if utils.is_bool(is_random_link): import random image = random.choice(pic_imgs) else: image = pic_imgs[0] safe_break = 0 count = 0 while image[0] in used_links: if utils.is_bool(is_random_link): image = random.choice(pic_imgs) else: image = pic_imgs[count] if image[0] in used_links: count += 1 continue break safe_break += 1 if safe_break == 50: break used_links.append(image[0]) imgTypes = {"jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png", "gif": "image/gif", "webm": "video/webm"} filepath = urlparse(image[0]).path ext = os.path.splitext(filepath)[1].lower() if not ext[ext.rfind(".") + 1:] in imgTypes: if "imgur" in image[0]: # Just make it .png it still returns correct image image[0] = "http://i.imgur.com/" + image[0].rsplit( '/', 1)[1] + ".png" ext = ".png" sn_kwgs = {} if "(x-post" in image[1].lower() or "(via" in image[1].lower(): image[1] = re.sub(r'\([^)]*\)', '', image[1]) if "sn" in kwargs.get('message'): sn_url, sn_kwgs = utils.saucenao(fname=image[0], api_key=kwargs.get('saucenao api'), metainfo=True) re_dict = {'{url}': image[2], '{title}': image[1], '{sn title}': sn_kwgs.get('title'), '{sn illust id}': sn_kwgs.get('illust id'), '{sn illust url}': sn_url, '{sn artist}': sn_kwgs.get('artist'), '{sn artist id}': sn_kwgs.get('artist id'), '{sn artist url}': sn_kwgs.get('artist url')} if kwargs.get('filename'): filename = utils.replace_all(kwargs.get('filename'), re_dict) filename = utils.safe_msg(filename) else: filename = "" if kwargs.get('message'): message = utils.replace_all(kwargs.get('message'), re_dict) message = utils.safe_msg(message) else: message = "" image = utils.download_image(image[0], path, filename, **kwargs) used_links = [sub] + used_links with open(txt_name, 'w') as f: f.write("\n".join(used_links)) return message, image