def convert_data_to_table_format(): logger.info("transform") storage = FileStorage(SCRAPPED_FILE) # transform gathered data from json file to pandas DataFrame and save as csv parser = Parser(storage) parser.parse(TABLE_FORMAT_FILE)
class Scraper(object): def __init__(self): self.__load_config() self.parser = Parser() self.csv_maker = CsvMaker() self.page_number = 1 def scrape(self): data = [] for page_no in range(1, self.config[PAGES]): self.page_number = page_no data = data + self.scrape_page() self.csv_maker.make(data) def scrape_page(self): print "Scraping Page No: {}".format(self.page_number) resp = requests.get(self.__url_endpoint(), self.__query_dict()) soup = BeautifulSoup(resp.text, 'html.parser') table = soup.findAll(True, {'class': ['row0', 'row1']}) return self.parser.parse(table) def __load_config(self): self.config = yaml.safe_load(open('config.yaml')) def __url_endpoint(self): return self.config[BASE_URL] def __query_dict(self): query_dict = {'pp': PER_PAGE_DATA, 'p': self.page_number} if self.config.has_key(COURSE): query_dict['q'] = self.config[COURSE] print query_dict return query_dict
def parse(city, country="France"): tmp = toParse.get(country) if not tmp: print(f"No such country as {country} is supported", file=sys.stderr) url = tmp.get(city) if not url: print(f"No such city as {city} is supported", file=sys.stderr) return Parser.parse(url, city, country)
def parse_and_write_bank_statement( parser: Parser, src_file: Path, dest_file: Path, rules_dir: Optional[Path], import_transaction: ImportTransactionProtocol, force: bool, dry_run: bool) -> bool: if dest_file.exists(): if force: print(f'WARNING: existing {dest_file} will be overwritten', file=sys.stderr) else: print(f'WARNING: skipping import of already imported {src_file}', file=sys.stderr) return False try: bank_statement = parser.parse(rules_dir=rules_dir) except NotImplementedError as e: print(f'Warning: couldn\'t parse {src_file}:', e.args, file=sys.stderr) return False if not dry_run: try: with open(dest_file, 'w') as f: bank_statement.write_ledger(f) except Exception as e: # Remove hledger file to allow clean import after fixing # whatever caused the Exception. try: dest_file.unlink() except FileNotFoundError: pass raise e else: with io.StringIO() as f: bank_statement.write_ledger(f) print(f.getvalue()) import_transaction.add_file(dest_file) src_ext = src_file.suffix moved_src = dest_file.with_suffix(src_ext) import_transaction.move_file_to_annex(src_file, moved_src) return True
from generation.generators.frontend.shopping_cart_generator import ShoppingCartGenerator as SCGenerator from generation.generators.frontend.auth_generator import AuthGenerator from generation.generators.frontend.home_generator import HomeGenerator from generation.generators.frontend.starter_generator import StarterGenerator from generation.generators.frontend.profile_generator import ProfileGenerator from generation.generators.frontend.product_generator import ProductGenerator from generation.generators.frontend.item_generator import ItemGenerator from generation.generators.frontend.category_generator import CategoryGenerator as CategoryGeneratorFront if __name__ == '__main__': try: shutil.rmtree('./output') except Exception: pass parser = Parser() model = parser.parse(os.path.join(root, "metamodel"), 'scala-angular.tx', 'project.scan', True) main_generator = MainGenerator() model_generator = ModelGenerator(main_generator) table_generator = TableGenerator(main_generator) repository_generator = RepositoryGenerator(main_generator) service_generator = ServiceGenerator(main_generator) controller_generator = ControllerGenerator(main_generator) dto_generator = DTOGenerator(main_generator) jwt_generator = JWTGenerator(main_generator) module_generator = ModuleGenerator(main_generator) conf_generator = ConfGenerator(main_generator) sbt_generator = SbtGenerator(main_generator) category_generator = CategoryGenerator(main_generator) order_generator = OrderGenerator(main_generator) order_item_generator = OrderItemGenerator(main_generator) shopping_cart_generator = ShoppingCartGenerator(main_generator)
def parseAll(): for country, cities in toParse.items(): for city, url in cities.items(): yield Parser.parse(url, city, country)