def __init__(self, **kwargs): super(EdgarSpider, self).__init__(**kwargs) symbols_arg = kwargs.get('symbols') start_date = kwargs.get('startdate', '') end_date = kwargs.get('enddate', '') limit_arg = kwargs.get('limit', '') utils.check_date_arg(start_date, 'startdate') utils.check_date_arg(end_date, 'enddate') start, count = utils.parse_limit_arg(limit_arg) if symbols_arg: if os.path.exists(symbols_arg): # get symbols from a text file symbols = utils.load_symbols(symbols_arg) else: # inline symbols in command symbols = symbols_arg.split(',') self.start_urls = URLGenerator(symbols, start_date, end_date, start, count) for one_url in self.start_urls: print(one_url) else: self.start_urls = []
def test_parse_limit_arg(self): self.assertEqual(utils.parse_limit_arg(''), (0, None)) self.assertEqual(utils.parse_limit_arg('11,22'), (11, 22)) with self.assertRaises(ValueError): utils.parse_limit_arg('11,22,33') with self.assertRaises(ValueError): utils.parse_limit_arg('abc')
def __init__(self, **kwargs): super(EdgarSpider, self).__init__(**kwargs) symbols_arg = kwargs.get("symbols") start_date = kwargs.get("startdate", "") end_date = kwargs.get("enddate", "") limit_arg = kwargs.get("limit", "") utils.check_date_arg(start_date, "startdate") utils.check_date_arg(end_date, "enddate") start, count = utils.parse_limit_arg(limit_arg) if symbols_arg: if os.path.exists(symbols_arg): # get symbols from a text file symbols = utils.load_symbols(symbols_arg) else: # inline symbols in command symbols = symbols_arg.split(",") self.start_urls = URLGenerator(symbols, start_date, end_date, start, count) else: self.start_urls = []