def ver_users(): while True: try: with open('115_Sistema_Registo_Utilizadores.txt', 'r') as f: utilizadores = f.read() break except FileNotFoundError: open('115_Sistema_Registo_Utilizadores.txt', 'w+') utilizadores = utilizadores.split(', ') n_max_user = len(str(len(utilizadores) / 2)) + 1 lengh = 6 + n_max_user + 25 + 7 print(c(mf.tit('Utilizadores Registados', lengh)).cyan) for n, item in enumerate(utilizadores): if n == 0: n_user = 1 print( c(f'{"Número":<{7 + n_max_user}}' f'{"Nome":<25}' f'Idade\n').yellow) elif n % 2 == 0: n_user += 1 if n % 2 == 0: print(c(f'User nº{str(n_user) + ":":<{n_max_user}}' f'{item:<25}').blue, end='') else: print(c(f'{item} anos').blue)
def ler_float(): while True: f = input('Escreva um número real: ').strip() try: f = float(f) return f except ValueError: print(c('Por favor introduza um número válido!\n').red) except KeyboardInterrupt: print(c('O utilizador não inseriu nenhum dado.\n').red) return 0
def ler_int(): while True: i = input('Escreva um número inteiro: ').strip() try: i = int(i) return i except ValueError: print(c('Por favor insira um número inteiro válido!\n').red) except KeyboardInterrupt: print(c('O utilizador não inseriu nunhum dado.\n').red) return 0
def menu(): print(c(mf.tit('Menu Principal')).cyan) print(c('Opções:').magenta) print(f'''{c('1 ->').yellow} {c('Registar um novo utilizador').blue} {c('2 ->').yellow} {c('Ver Lista de utilizadores registados').blue} {c('3 ->').yellow} {c('Elimnar um utilizador').blue} {c('4 ->').yellow} {c('Terminar o Programa').blue}\n''') esc = mi.inputthis( c('Digite o número da operação a executar: ').green, ('1', '2', '3', '4'), c('Escreva só 1, 2, 3 ou 4!').red) return esc
def runScriptDirectly(script: str): click.echo('\n\t' + c('>').blue + c('>').yellow + f' {script}\n') myp_obj: MYPReader = MYPReader() output: subprocess.CompletedProcess if myp_obj.get_data("use_environment"): # os.system(f'snakenv {myp_obj.get_data("environment")} -c "{script}"') output = subprocess.run(['pipenv', 'run', *script.split()]) else: # os.system(script) output = subprocess.run(script.split()) if not output.returncode == 0: raise Exception(f"Process exited with code {output.returncode}")
def collect_testcases( files: typing.Iterable[pathlib.Path], ) -> typing.Dict[str, typing.Callable]: testcases: typing.Dict[str, typing.Callable] = {} for f in files: try: module = load_module(f) for name, func in module.__dict__.items(): if hasattr(func, "_tbot_testcase"): if name in testcases: print( c("Warning").yellow.bold + f": Duplicate testcase {name!r}", file=sys.stderr, ) def duplicate(*args: typing.Any, **kwargs: typing.Any) -> None: files = "-/-" paths = getattr(duplicate, "_tbot_files") if paths is not None: files = "\n > ".join(map(str, paths)) raise RuntimeError( c(f"The testcase {name!r} exists multiple times!" ).yellow + """ Please tighten your testcase paths or remove/rename them so this conflict can be resolved. The testcase was defined in: > """ + files) tbot_files = getattr(testcases[name], "_tbot_files").copy() tbot_files.append(f) setattr(duplicate, "_tbot_files", tbot_files) testcases[name] = duplicate else: func._tbot_files = [f] testcases[name] = func except: # noqa: E722 import textwrap import traceback trace = textwrap.indent(traceback.format_exc(), " ") print( c("Warning").yellow.bold + f": Failed to load {f}:\n{trace}", file=sys.stderr, ) return testcases
def registar_user(): print(c(mf.tit('Registar Utilizador')).cyan) nome = str(input(c('Nome do novo utilizador: ').green)).strip() idade = str( mi.inputint( c('Idade do novo utilizador: ', c('Escreva um número inteiro válido!').red, False).green)) while True: try: with open('115_Sistema_Registo_Utilizadores.txt', 'a') as f: f.write(f', {nome}, {idade}') break except FileNotFoundError: open('115_Sistema_Registo_Utilizadores.txt', 'w+')
def warning(msg: typing.Union[str, c]) -> EventIO: """ Emit a warning message. :param str msg: The message """ return message(c("Warning").yellow.bold + ": " + msg, Verbosity.QUIET)
def _prefix(self) -> str: after = self.nest_first if self.first else u("│ ", "| ") self.first = False prefix: str = self.prefix or "" return (str( c("".join(itertools.repeat(u("│ ", "| "), NESTING)) + after).dark) + prefix)
def duplicate(*args: typing.Any, **kwargs: typing.Any) -> None: files = "-/-" paths = getattr(duplicate, "_tbot_files") if paths is not None: files = "\n > ".join(map(str, paths)) raise RuntimeError( c(f"The testcase {name!r} exists multiple times!" ).yellow + """ Please tighten your testcase paths or remove/rename them so this conflict can be resolved. The testcase was defined in: > """ + files)
def delete_user(): print(c(mf.tit('Eliminar Utilizador')).cyan) try: with open('115_Sistema_Registo_Utilizadores.txt', 'r') as f: utilizadores = f.read() except FileNotFoundError: print('Ainda não há nenhum utilizador registado!') return while True: try: num = mi.inputint( c('Número do utilizador: ').green, c('Escreva um número inteiro válido!').red, False) utilizadores = utilizadores.split(', ') utilizadores.pop(2 * num - 2) utilizadores.pop(2 * num - 2) utilizadores = mf.join_list(utilizadores, ', ') break except IndexError: print(c('Utilizador inválido!\n').red) with open('115_Sistema_Registo_Utilizadores.txt', 'w') as f: f.write(utilizadores)
def _print_lines(self, last: bool = False) -> None: buf = self.getvalue()[self.cursor:] if self.verbosity > VERBOSITY: return while "\n" in buf: line = buf.split("\n", maxsplit=1)[0] print(self._prefix() + c(line)) length = len(line) + 1 self.cursor += length buf = buf[length:] self.first = False if last and buf != "": print(self._prefix() + buf)
o = Client.getJobStatus(jobs_to_query) for j in o[1]: if j is None: continue message_template = "%s: %s (%s): %s %s" % ( j.PandaID, j.jobStatus, j.jobSubStatus, j.jobParameters, j.cmtConfig) if quiet: message_template = j.jobStatus print message_template os.exit(0) message = None if j.jobStatus == 'finished': message = c(message_template).green elif j.jobStatus == 'failed': message = c(message_template).red elif j.jobStatus == 'running' or j.jobStatus == 'starting' or j.jobStatus == 'transferring': message = c(message_template).blue elif j.jobStatus == 'sent' or j.jobStatus == 'holding': message = c(message_template).yellow elif j.jobStatus == 'cancelled': message = c(message_template).cyan elif j.jobStatus == 'defined': message = c(message_template).cyan elif j.jobStatus == 'activated': message = c(message_template).cyan else: # unknown message = '%s: job does not exist or status=%s' % (j.PandaID, j.jobStatus)
def process_url_for_validity(ad_name: str, region: str, category: str, subcategory: str, url: url_type) -> url_type or False: """ Process url for parser. Avito generates a lot of noise when parsing data from its data. Sometimes the parameters by which the parser will search for data are invalid for the link, and this function does everything possible to create the correct link. ad_name: str -> ad name parameter. region: str -> region by which will search ads. category: str -> category by which will search ads. subcategory: str -> subcategory by which will search ads. @return: url or False -> If url success handled, return handled url else parameters incorrect, return False and stop parsing ads by this parameters. """ for try_number in range(1, 4 + 1): try: test_request = get_response(url) bs_html: BeautifulSoup = BeautifulSoup(test_request.content, 'html.parser') test_get_text = bs_html.find( 'span', class_='page-title-count-1oJOc').get_text except AttributeError: if bs_html.find('h2', class_='no-results-title-3kn6E') is not None: print('Ничего не найдено в выбранной области поиска') return False elif bs_html.find('h2', class_='firewall-title') is None: if try_number == 1: past_region = region region = region.split()[-1].strip() region_for_url = transliterate.translit( region, reversed=True).replace(' ', '_').strip().replace( "'", '').replace('j', 'y') url = f"{HOST}/{region_for_url}{url[21 + url[21:].find('/'):]}" elif try_number == 2: print( f'Введите регион ({region}), что бы он отвечал на вопрос (область чья?) ' 'или образовывал словосочитание по типу (Московская область).\n' 'Примеры:\n' ' Киров -> Кировская\n' ' Москва -> Московская\n') region = input('Вводите: ').strip() + ' область ' + region region_for_url = transliterate.translit( region, reversed=True).replace(' ', '_').strip().replace( "'", '').replace('j', 'y') url = f"{HOST}/{region_for_url}{URL[21 + URL[21:].find('/'):]}" elif try_number == 3: print( 'Введите ссылку вручную. Её можно получить на Авито ' 'https://www.avito.ru/ в адресной строке, указав в поисковик параметры:\n' f'Ключ: {ad_name}; Регион: {past_region}; Категория: {category}; Подкатегория: {subcategory};\n' ) url = input('Вводите: ') region = transliterate.translit(URL[21:URL.find('/', 21)], 'ru') else: print(c('По вашим параметрам ничего не найдено.').red) return False elif bs_html.find('h2', class_='firewall-title') is not None: print( c('Ваш IP адрес заблокировал Avito на время. Следуйте указаниям файла help.txt.' ).red) exit() else: return url
def run(): global list_statistic_about_ad, counter_first_fifty_ads, output_xlsx_file, parsed_ads, URL row: int = 2 column_letters = 'ABCD' row_is_fill: bool = False xlsx_file_path = input( 'Введите название файла с параметрами или его путь: ') workbook = openpyxl.load_workbook(xlsx_file_path) workbook_list = workbook['Лист1'] while True: for col_letter in column_letters: if workbook_list[col_letter + str(row)].value is None: row_is_fill = False else: row_is_fill = True break if row_is_fill: print('Подождите 40-65 секунд...') time.sleep(round(randint(40, 65) + random(), 2)) # Parameters for link which need parsed ad_name = workbook_list[f'A{row}'].value region = workbook_list[f'B{row}'].value.lower().strip() category = workbook_list[f'C{row}'].value.lower().strip() subcategory = workbook_list[f'D{row}'].value.lower().strip() generate_valid_url_for_parsing(ad_name, region, category, subcategory) if not URL: continue row += 1 # Reset past data list_statistic_about_ad = [] counter_first_fifty_ads = 50 parsed_ads = [] output_xlsx_file = { 'Ключ': ad_name, 'Регион': region, 'Общее количество объявлений': 0, 'Общее количество просмотров всего': 0, 'Общее количество просмотров за сегодня': 0, 'Дата публикации 10-ого объявления (сортировка по дате)': 'Нету', '20-ого объявления (сортировка по дате)': 'Нету', '50-ого объявления (сортировка по дате)': 'Нету', 'Средняя цена всех со всех объявлений': 0, 'Общее количество просмотров первых 50 объявлений (сегодня)': 0, 'Общее количество просмотров первых 50 объявлений (всего)': 0 } print(f'Парсится ссылка {URL}') print( f'{"Ключ": ^25} | {"Регион": ^26} | {"Категория": ^29} | {"Подкатегория": ^32}' ) print('_' * 121) print( f'{ad_name: ^23} | {region: ^26} | {category: ^28} | {subcategory: ^32}' ) set_common_amount_of_ad() set_date_of_publication_of_ad() time.sleep(3) avito_response: Response = get_response(URL) avito_page_content: BeautifulSoup = BeautifulSoup( avito_response.content, 'html.parser') try: max_pages = int( avito_page_content.find_all( 'span', class_='pagination-item-1WyVp')[-2].text) except IndexError: max_pages = 1 send_ad_data_to_functions(max_pages) print('Записываем данные...') send_workbook_lists() print(c('Данные успешно записаны!').green) else: break print('Время работы парсера', round(time.time() - start_time_script), 'секунды')
def send_ad_data_to_functions(max_pages: int) -> None: """ Send data to functions, received from ad page. In this function implementation simulation of the human factor (makes random pauses before the request (about 8-10 seconds) and requests a random number of times (from 1 to 5)). This is done so that Avito does not consider the parser a bot. Also protects against advertisements from another city that spoil statistics. He function goes through the pages (if the number of ads allows), opens the ads and sends the desired data to the functions. If Avito give sponsored links, function skip this. max_pages: int -> maximum number of pages with ads. """ maximum_amount_of_open_links_without_pause: int = randint(1, 5) next_page: int = 1 counter_parsed_link: int = 1 # Sometimes ads are not enough for one page and Avito fills it with similar ads. if output_xlsx_file['Общее количество объявлений'] <= 50: ad_limit = output_xlsx_file['Общее количество объявлений'] else: ad_limit = 'More than 50' while next_page != max_pages + 1: link_on_page_with_ads: url_type = get_response(URL).url # Avito does not allow you to navigate through the pages if a link with random letters is not specified # (this is how it looks like avito.ru/chita/krasota_i_zdorove/kupit-meditsinskie_izdeliya-ASgBAgICAUSEAqgJ), # this link can be obtained by request. This is the only way to go through the pages if '?' in link_on_page_with_ads: link_to_navigate_through_pages: url_type = link_on_page_with_ads + f'&p={next_page}' else: link_to_navigate_through_pages: url_type = link_on_page_with_ads + f'?p={next_page}' time.sleep(2.2) page_with_ad: Response = get_response(link_to_navigate_through_pages) content_of_ad_page: BeautifulSoup = BeautifulSoup( page_with_ad.content, 'html.parser') links_on_ads: List[url_type] = [] for element in content_of_ad_page.find_all( class_='item_table-description'): link_on_ad = HOST + element.find('a').get('href') if link_on_ad[21:].find('/') == -1: continue else: links_on_ads.append(link_on_ad) if not links_on_ads: # Avito sometimes gives out the wrong data that it usually gives out (we are talking about classes). for element in content_of_ad_page.find_all( class_='iva-item-body-NPl6W'): link_on_ad = HOST + element.find('a').get('href') if link_on_ad[21:].find('/') == -1: continue else: links_on_ads.append(link_on_ad) if not links_on_ads: open('new_tags_неудалять.html', 'w').write(page_with_ad.text) print( 'Авито прислал невалидный сайт, перезапустите программу с теми параметрами, ' 'на которых остановился парсер.') exit() time.sleep(3) for link in links_on_ads: if link not in parsed_ads: # It is done to imitate a person, so that Avito does not consider the parser a bot. # If delete this code, Avito can give block by IP for a while. if maximum_amount_of_open_links_without_pause == 0: time.sleep(round(randint(8, 10) + random(), 2)) maximum_amount_of_open_links_without_pause = randint(1, 5) maximum_amount_of_open_links_without_pause -= 1 ad_page: Response = get_response(link) bs_ad_content: BeautifulSoup = BeautifulSoup( ad_page.content, 'html.parser') bypass_traps_avito(bs_ad_content, ad_page, link) parsed_ads.append(link) print( c(f'{link[12:]: <115} спарсено удачно.').magenta, f'Осталось {counter_parsed_link}/{output_xlsx_file["Общее количество объявлений"]}' ) counter_parsed_link += 1 if isinstance(ad_limit, int): ad_limit -= 1 if ad_limit == 0: break if isinstance(ad_limit, int): break print(f'{next_page} из {max_pages} спарсено.') next_page += 1 set_average_price_of_all_ads() print(c('Парсинг завершен успешно!').green)
def bypass_traps_avito(bs_ad_html: BeautifulSoup, ad_page: Response, link: url_type) -> None: """ Bypasses the traps that Avito makes. For example, it can send that the page was not found, although it exists. If the price of the ad does not indicate the product price is 0. If Avito sends an error 429, the script stops working because it can no longer send requests due to IP blocking. bs_ad_html: BeautifulSoup -> object from BeautifulSoup from ad page. ad_page: Response -> response from ad page Avito. link: str -> link on ad page. """ try: views_on_ad_page: str = bs_ad_html.find( class_='title-info-metadata-item').get_text()[1:].split() try: price_of_product: int_price = int( str(bs_ad_html.find('span', class_='js-item-price').text).replace( ' ', '')) except AttributeError: price_of_product: int_price = 0 except AttributeError: if ad_page.status_code == 404: try: ad_page: Response = get_response(link) bs_ad_html: BeautifulSoup = BeautifulSoup( ad_page.content, 'html.parser') views_on_ad_page: list = bs_ad_html.find( class_='title-info-metadata-item').get_text()[1:].split() try: price_of_product: int_price = int( str( bs_ad_html.find( 'span', class_='js-item-price').text).replace(' ', '')) except AttributeError: price_of_product: int_price = 0 except AttributeError: print( c(''' Это исключение возможно лишь при том, если Avito отвечает не тем сайтом, который пар- сер ожидал увидеть. За остальной информацией обращайтесь к файлу help.txt. Если программа продолжает парсить, необращайте внимания. ''').yellow) else: try: set_total_amount_views(views_on_ad_page) except IndexError: pass add_price_to_price_from_all_ads(price_of_product) set_data_about_ad(ad_page, views_on_ad_page, price_of_product, link) elif ad_page.status_code == 429: print( c(''' Ваш IP был заблокирован на время. Нужно подождать некоторое время, либо зайти на на сайт через браузер и ввести капчу. Если ничто из этого не помогло, следуйте указа- ниям в файле help.txt. ''').red) exit() else: try: set_total_amount_views(views_on_ad_page) except IndexError: pass add_price_to_price_from_all_ads(price_of_product) set_data_about_ad(ad_page, views_on_ad_page, price_of_product, link)
from Modules import my_format as mf from Modules_Ex import Ex115 as Ex from termcolor2 import c print(c(mf.tit('Sistema de Registo de Utilizadores')).white) while True: opc = Ex.menu() print('\n') if opc == '1': Ex.registar_user() print('\n\n') elif opc == '2': Ex.ver_users() print('\n\n') elif opc == '3': Ex.delete_user() print('\n\n') elif opc == '4': break
def collect_testcases( files: typing.Iterable[pathlib.Path], ) -> typing.Dict[str, typing.Callable]: """ Create a dict of all testcases found in the given files. Reads all files in order and finds all functions annotated with :func:`tbot.testcase`. Will print a warning if two testcases have the same name. :param files: Iterator of files :returns: A mapping of names to testcases (functions) """ testcases: typing.Dict[str, typing.Callable] = {} for f in files: try: module = load_module(f) for func in module.__dict__.values(): name = getattr(func, "_tbot_testcase", None) if name is not None: if name in testcases: # If it already exists, check so we don't warn about the # testcase being imported into another files global namespace if testcases[name].__code__ is func.__code__: continue print( c("Warning").yellow.bold + f": Duplicate testcase {name!r}", file=sys.stderr, ) def duplicate(*args: typing.Any, **kwargs: typing.Any) -> None: files = "-/-" paths = getattr(duplicate, "_tbot_files") if paths is not None: files = "\n > ".join(map(str, paths)) raise RuntimeError( c(f"The testcase {name!r} exists multiple times!" ).yellow + """ Please tighten your testcase paths or remove/rename them so this conflict can be resolved. The testcase was defined in: > """ + files) tbot_files = getattr(testcases[name], "_tbot_files").copy() tbot_files.append(f) setattr(duplicate, "_tbot_files", tbot_files) testcases[name] = duplicate else: func._tbot_files = [f] testcases[name] = func except: # noqa: E722 import textwrap import traceback trace = textwrap.indent(traceback.format_exc(), " ") print( c("Warning").yellow.bold + f": Failed to load {f}:\n{trace}", file=sys.stderr, ) return testcases
def runScriptDirectly(script): print('\n\t' + c('>').blue + c('>').yellow + f' {script}\n') os.system(script)
###### from termcolor import c from termcolor2 import c # termcolor2 + colorama print(c('red').red.on_white.underline) print(c('yellow').yellow.on_white.underline) print(c('green').green.on_white.underline) print(c('blue').cyan.on_white.underline) print(c('dark blue').blue.on_white.underline) print(c('purple').magenta.on_white.underline) ####### from prettytable import PrettyTable from colorama import Fore, Back, Style table = PrettyTable() table.field_names = ['colon1', 'colon2', 'colon3'] table.add_row(["user name", 10, Fore.RED + 'red' + Fore.WHITE]) table.add_row(["user1", 4, 'test1']) table.add_row(["fsgj", 12, 'test6']) table.add_row(["nbvc", 5, 'test3']) table.aling = 'r' table.sortby = 'colon3' print(table)
if 'name' in template_data: name = template_data['name'] for k, v in template_data['variables'].iteritems(): if k in expected_vars: # read variables list try: x = ast.literal_eval(v) if isinstance(x, list) and any(x): if any(x): if len(x) == 1: literals[k] = x[0] else: arrays[k] = "[%s]" % (",".join(x)) else: print(c("Ignoring empty declaration of %s" % v).magenta) else: #literals[k] = x literals[k] = [x] vars_used.add(k) except SyntaxError: range_match = re.search(range_re, v) if range_match is None: # it's a string anyway literals[k] = v vars_used.add(k) continue seq_data = list(range_match.groups()) step = seq_data[5] seq_data = [seq_data[0], seq_data[2]] if step is not None: # no step given seq_data.append(step)
# if argv == '-s': # aSrvID = sys.argv[idx+1] # sys.argv = sys.argv[:idx] # break jobs_to_kill = [] for j in sys.argv[1:]: if j.isdigit(): jobs_to_kill.append(j) if len(sys.argv) == 1: print("No joID given") sys.exit(1) jobs_to_retry = sys.argv[1] s, o = Client.retryJob(jobs_to_retry, verbose=True) print s print("Job retry results:\n=============================") for i in range(len(jobs_to_kill)): if o[i]: print( c("%s: %s" % (jobs_to_kill[i], 'success' if o[i] else 'failed')).green) else: print( c("%s: %s" % (jobs_to_kill[i], 'success' if o[i] else 'failed')).red)