コード例 #1
0
def convert_data_to_table_format():
    logger.info("transform")
    storage = FileStorage(SCRAPPED_FILE)

    # transform gathered data from json file to pandas DataFrame and save as csv
    parser = Parser(storage)
    parser.parse(TABLE_FORMAT_FILE)
コード例 #2
0
class Scraper(object):

    def __init__(self):
        self.__load_config()
        self.parser = Parser()
        self.csv_maker = CsvMaker()
        self.page_number = 1

    def scrape(self):
        data = []
        for page_no in range(1, self.config[PAGES]):
            self.page_number = page_no
            data  = data + self.scrape_page()
        self.csv_maker.make(data)

    def scrape_page(self):
        print "Scraping Page No: {}".format(self.page_number)
        resp = requests.get(self.__url_endpoint(), self.__query_dict())
        soup = BeautifulSoup(resp.text, 'html.parser')
        table = soup.findAll(True, {'class': ['row0', 'row1']})
        return self.parser.parse(table)

    def __load_config(self):
        self.config = yaml.safe_load(open('config.yaml'))

    def __url_endpoint(self):
        return self.config[BASE_URL]

    def __query_dict(self):
        query_dict = {'pp': PER_PAGE_DATA, 'p': self.page_number}
        if self.config.has_key(COURSE):
            query_dict['q'] = self.config[COURSE]
        print query_dict
        return query_dict
コード例 #3
0
def parse(city, country="France"):
    tmp = toParse.get(country)
    if not tmp:
        print(f"No such country as {country} is supported", file=sys.stderr)
    url = tmp.get(city)
    if not url:
        print(f"No such city as {city} is supported", file=sys.stderr)
    return Parser.parse(url, city, country)
コード例 #4
0
def parse_and_write_bank_statement(
        parser: Parser,
        src_file: Path,
        dest_file: Path,
        rules_dir: Optional[Path],
        import_transaction: ImportTransactionProtocol,
        force: bool,
        dry_run: bool) -> bool:
    if dest_file.exists():
        if force:
            print(f'WARNING: existing {dest_file} will be overwritten',
                  file=sys.stderr)
        else:
            print(f'WARNING: skipping import of already imported {src_file}',
                  file=sys.stderr)
            return False
    try:
        bank_statement = parser.parse(rules_dir=rules_dir)
    except NotImplementedError as e:
        print(f'Warning: couldn\'t parse {src_file}:', e.args,
              file=sys.stderr)
        return False
    if not dry_run:
        try:
            with open(dest_file, 'w') as f:
                bank_statement.write_ledger(f)
        except Exception as e:
            # Remove hledger file to allow clean import after fixing
            # whatever caused the Exception.
            try:
                dest_file.unlink()
            except FileNotFoundError:
                pass
            raise e
    else:
        with io.StringIO() as f:
            bank_statement.write_ledger(f)
            print(f.getvalue())
    import_transaction.add_file(dest_file)
    src_ext = src_file.suffix
    moved_src = dest_file.with_suffix(src_ext)
    import_transaction.move_file_to_annex(src_file, moved_src)
    return True
コード例 #5
0
from generation.generators.frontend.shopping_cart_generator import ShoppingCartGenerator as SCGenerator
from generation.generators.frontend.auth_generator import AuthGenerator
from generation.generators.frontend.home_generator import HomeGenerator
from generation.generators.frontend.starter_generator import StarterGenerator
from generation.generators.frontend.profile_generator import ProfileGenerator
from generation.generators.frontend.product_generator import ProductGenerator
from generation.generators.frontend.item_generator import ItemGenerator
from generation.generators.frontend.category_generator import CategoryGenerator as CategoryGeneratorFront

if __name__ == '__main__':
    try:
        shutil.rmtree('./output')
    except Exception:
        pass
    parser = Parser()
    model = parser.parse(os.path.join(root, "metamodel"), 'scala-angular.tx',
                         'project.scan', True)
    main_generator = MainGenerator()
    model_generator = ModelGenerator(main_generator)
    table_generator = TableGenerator(main_generator)
    repository_generator = RepositoryGenerator(main_generator)
    service_generator = ServiceGenerator(main_generator)
    controller_generator = ControllerGenerator(main_generator)
    dto_generator = DTOGenerator(main_generator)
    jwt_generator = JWTGenerator(main_generator)
    module_generator = ModuleGenerator(main_generator)
    conf_generator = ConfGenerator(main_generator)
    sbt_generator = SbtGenerator(main_generator)
    category_generator = CategoryGenerator(main_generator)
    order_generator = OrderGenerator(main_generator)
    order_item_generator = OrderItemGenerator(main_generator)
    shopping_cart_generator = ShoppingCartGenerator(main_generator)
コード例 #6
0
def parseAll():
    for country, cities in toParse.items():
        for city, url in cities.items():
            yield Parser.parse(url, city, country)