def __init__(self, web_request, web_object_factory, data_handler): super(OptionFilter).__init__() self.web_request = web_request self.web_object_factory = web_object_factory self.data_handler = data_handler self.filtered_data = [] self.filtered_data_keywords = [] self.filtered_recursive_data = [] self.filtered_recursive_data_keywords = [] self.web_data_objects = [] self.view = ConsoleView()
def __init__(self): super(OptionFilter).__init__() self.url = '' self.url_padding = '' self.recursive_urls = [] self.requests = requests self.request_data = None self.requests_status_code = None self.recursive_request_data = [] self.recursive_request_data_count = 0 self.view = ConsoleView()
def __init__(self): self.web_object_factory = WebObjectFactory() self.view = ConsoleView() self.save_list = self.init_save_list()
class DataHandler(object): def __init__(self): self.web_object_factory = WebObjectFactory() self.view = ConsoleView() self.save_list = self.init_save_list() def init_save_list(self): s_list = [] try: with open('savelist.pickle', 'rb') as input_file: s_list = pickle.load(input_file) except (EOFError, FileNotFoundError): self.view.display_item('No save list found, creating new save list.....') with open('savelist.pickle', 'wb') as output_file: pickle.dump(s_list, output_file) return s_list def save_save_list(self): try: with open('savelist.pickle', 'wb') as output_file: pickle.dump(self.save_list, output_file) except (EOFError, FileNotFoundError): self.view.display_item('Error saving save list.....') def display_save_list(self): self.view.display_item('displaying file save locations.....') self.view.display_items(self.save_list) def add_save_list(self, save_location): save_location = abspath(save_location) if save_location not in self.save_list: self.save_list.append(save_location) self.save_save_list() def remove_save_list(self, save_location): save_location = abspath(save_location) self.save_list.remove(save_location) self.save_save_list() def save_objects(self, web_objs, path): try: sys.setrecursionlimit(1000000) objs = [] for obj in web_objs: dict = obj.__dict__.copy() for key in obj.__dict__: if key.startswith('__') and key.endswith('__'): del dict[key] objs.append(dict) output_file = open(path, 'wb') pickle.dump(objs, output_file) self.add_save_list(path) except FileNotFoundError: self.view.display_item('Error saving file.....') def remove_objects(self, path): try: self.view.display_item('removing file ' + path + '.....') remove(path) self.remove_save_list(path) except FileNotFoundError: self.view.display_item('File ' + path + ' not found.....') def load_objects(self, path): loaded_objs = [] try: with open(path, 'rb') as input_file: web_objs = pickle.load(input_file) for obj in web_objs: loaded_objs.append(self.web_object_factory.build_object('product', obj)) except FileNotFoundError: self.view.display_item('File ' + path + ' not found.....') return loaded_objs
def __init__(self, web_data): super(OptionFilter).__init__() self.web_data = web_data self.data = {} self.graph_type = None self.view = ConsoleView()
class GraphCreator(OptionFilter): def __init__(self, web_data): super(OptionFilter).__init__() self.web_data = web_data self.data = {} self.graph_type = None self.view = ConsoleView() def handle_command(self, args): return self.command(args, graph_creator_options) def display_graph(self, *args): labels = [] sizes = [] colors = [] explode = [] color_count = len(graph_colors) - 1 count = 0 largest_value = 0 for key, value in self.data.items(): largest_value = value if value >= largest_value else largest_value color = graph_colors[count] labels.append(key) sizes.append(value) colors.append(color) explode.append(0.0) count = (count + 1) if count < color_count else 0 if len(self.data) > 0: explode_index = sizes.index(largest_value) explode[explode_index] = 0.1 plt.title(args[self.PARAMETER_ONE]) plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90) plt.axis('equal') self.view.display_item('displaying graph.....') plt.show() def graph_data(self, *args): self.data.clear() web_data = self.web_data.get_data() attr_name = args[self.PARAMETER_ONE] self.view.display_item('gathering data.....') for wd in web_data: try: wd_attr = getattr(wd, attr_name) if type(wd_attr) is decimal.Decimal: self.currency_data(wd_attr) elif type(wd_attr) is date: self.date_data(wd_attr) else: self.str_data(wd_attr) except (AttributeError, UnboundLocalError): self.view.\ display_item('Error, WebObject contains no attribute ' + attr_name + '.....') def currency_data(self, value): self.view.display_item('currency') def date_data(self, value): pass def str_data(self, value): if self.data.get(value) is None: self.data[value] = 1 else: self.data[value] = self.data.get(value) + 1 def display_graph_data(self, *args): for key, value in self.data.items(): print(key + ': ' + str(value))
class GraphCreator(OptionFilter): def __init__(self, web_data): super(OptionFilter).__init__() self.web_data = web_data self.data = {} self.graph_type = None self.view = ConsoleView() def handle_command(self, args): return self.command(args, graph_creator_options) def display_graph(self, *args): labels = [] sizes = [] colors = [] explode = [] color_count = len(graph_colors) - 1 count = 0 largest_value = 0 for key, value in self.data.items(): largest_value = value if value >= largest_value else largest_value color = graph_colors[count] labels.append(key) sizes.append(value) colors.append(color) explode.append(0.0) count = (count + 1) if count < color_count else 0 if len(self.data) > 0: explode_index = sizes.index(largest_value) explode[explode_index] = 0.1 plt.title(args[self.PARAMETER_ONE]) plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90) plt.axis('equal') self.view.display_item('displaying graph.....') plt.show() def graph_data(self, *args): self.data.clear() web_data = self.web_data.get_data() attr_name = args[self.PARAMETER_ONE] self.view.display_item('gathering data.....') for wd in web_data: try: wd_attr = getattr(wd, attr_name) if type(wd_attr) is decimal.Decimal: self.currency_data(wd_attr) elif type(wd_attr) is date: self.date_data(wd_attr) else: self.str_data(wd_attr) except (AttributeError, UnboundLocalError): self.view.display_item('Error, WebObject contains no attribute ' + attr_name + '.....') def currency_data(self, value): self.view.display_item('currency') def date_data(self, value): pass def str_data(self, value): if self.data.get(value) is None: self.data[value] = 1 else: self.data[value] = self.data.get(value) + 1 def display_graph_data(self, *args): for key, value in self.data.items(): print(key + ': ' + str(value))
class WebData(OptionFilter): TAG_TYPE = 0 CLASS_ID = 1 CLASS_ID_NAME = 2 CONSOLIDATE_DATA_PARAM_COUNT = 2 CONSOLIDATE_ERROR_MSG = 'Error consolidating data, please try again...' def __init__(self, web_request, web_object_factory, data_handler): super(OptionFilter).__init__() self.web_request = web_request self.web_object_factory = web_object_factory self.data_handler = data_handler self.filtered_data = [] self.filtered_data_keywords = [] self.filtered_recursive_data = [] self.filtered_recursive_data_keywords = [] self.web_data_objects = [] self.view = ConsoleView() def handle_command(self, args): return self.command(args, web_data_options) def get_data(self): return self.web_data_objects def clear_filtered_data(self, *args): self.view.display_item('clearing filtered data.....') del self.filtered_data[:] del self.filtered_data_keywords[:] del self.filtered_recursive_data[:] del self.filtered_recursive_data_keywords[:] del self.web_data_objects[:] def print_data(self, *args): attr = self.method_options(args[self.COMMAND_OPTION], web_data_print_options) if attr is not None: if not isinstance(attr, list): self.view.display_item(args[self.COMMAND_OPTION] + ': ' + str(attr)) else: self.view.display_items(attr) def print_web_data_object(self, *args): self.view.display_item('displaying web data objects.....') self.view.display_item('---------------------------------' '-------------------------------') for wo in self.web_data_objects: wo.func_display_data(wo, self.view) self.view.display_item('-----------------------------' '-----------------------------------') def display_saves(self, *args): self.data_handler.display_save_list() def load_saved_data(self, *args): self.view.display_item('loading saved data.....') loaded_objs = self.data_handler.load_objects(args[self.PARAMETER_ONE]) self.web_data_objects = loaded_objs def save_data(self, *args): self.view.display_item('saving data to disk.....') self.data_handler.save_objects(self.web_data_objects, args[self.PARAMETER_ONE]) def remove_data(self, *args): self.data_handler.remove_objects(args[self.PARAMETER_ONE]) def get_request_data(self, *args): try: data_options = self.check_second_level_args(args)[ self.COMMAND_OPTION] data = self.web_request.get_request_data() req_data = BeautifulSoup(data, 'html.parser') \ .findAll(data_options[self.TAG_TYPE], attrs={data_options[self.CLASS_ID]: data_options [self.CLASS_ID_NAME]}) for data in req_data: self.filtered_data.append(data) self.view.display_item('filtering data.....') except TypeError: self.view.display_item(self.COMMAND_ERROR_MSG) return def get_recursive_request_data(self, *args): try: data_options = self.check_second_level_args(args)[ self.COMMAND_OPTION] for data in self.web_request.get_recursive_request_data(): self.view.display_item('filtering recursive data.....') rec_data = BeautifulSoup(data, 'html.parser') \ .find(data_options[self.TAG_TYPE], attrs={data_options[self.CLASS_ID]: data_options [self.CLASS_ID_NAME]}) self.filtered_recursive_data.append(rec_data) except TypeError: self.view.display_item(self.COMMAND_ERROR_MSG) return def filter_urls(self, *args): try: data_options = self.check_second_level_args(args)[ self.COMMAND_OPTION] self.view.display_item('filtering urls.....') for data in self.filtered_data: tag_depth = self.check_data_int(data_options[self.CLASS_ID]) if tag_depth is not None: url = data.find_all(data_options[self.TAG_TYPE]) self.web_request.add_recursive_url(url[tag_depth]['href']) else: url = data.find(data_options[self.TAG_TYPE], attrs={ data_options[self.CLASS_ID]: data_options[self.CLASS_ID_NAME] }) self.web_request.add_recursive_url(url['href']) except (TypeError, KeyError, IndexError): self.view.display_item(self.COMMAND_ERROR_MSG) return def set_data_keywords(self, *args): kw_pairs = self.check_second_level_args(args) if kw_pairs is not None: for kw_pair in kw_pairs: keywords = [kw_pair[0], kw_pair[1], kw_pair[2]] self.view.display_item('adding tag, class pair: ' + str(keywords)) self.filtered_data_keywords.append(keywords) def set_recursive_data_keywords(self, *args): kw_pairs = self.check_second_level_args(args) if kw_pairs is not None: for kw_pair in kw_pairs: r_keywords = [kw_pair[0], kw_pair[1], kw_pair[2]] self.view.display_item('adding tag, class pair: ' + str(r_keywords)) self.filtered_recursive_data_keywords.append(r_keywords) def consolidate_data(self, *args): params = self.check_second_level_args(args) if params is not None and self.check_second_level_param_count( params, self.CONSOLIDATE_DATA_PARAM_COUNT): func_one = self.method_options( params[self.PARAMETER_ONE][self.PARAMETER_ONE], web_data_consolidate_options) func_two = self.method_options( params[self.PARAMETER_TWO][self.PARAMETER_ONE], web_data_consolidate_options) try: attr_one = func_one( self.filtered_data, self.filtered_data_keywords, params[self.PARAMETER_ONE][self.PARAMETER_TWO], params[self.PARAMETER_ONE][self.PARAMETER_THREE]) attr_two = func_two( self.filtered_recursive_data, self.filtered_recursive_data_keywords, params[self.PARAMETER_TWO][self.PARAMETER_TWO], params[self.PARAMETER_TWO][self.PARAMETER_THREE]) self.create_web_data_object(attr_one, attr_two) except TypeError: self.view.display_item(self.CONSOLIDATE_ERROR_MSG) def filter_by_children(self, *args): obj_attrs = [] data = args[0] for d in data: names = OrderedSet() attrs = {} try: for dc in d.find_all('div'): name = dc.find('span') value = dc.find('div') if value and name is not None: if name.text not in names: names.add(name.text) attrs[name.text] = value.text obj_attrs.append(attrs) except AttributeError: self.view.display_item('Error filtering data ' 'from children.....') web_objs = self.sanitise_attributes(obj_attrs) return web_objs def filter_by_keywords(self, *args): data = args[self.PARAMETER_ONE] data_kw = args[self.PARAMETER_TWO] obj_attr = [] for d in data: try: attrs = {} for kw_pair in data_kw: tag_depth = self.check_data_int( kw_pair[self.PARAMETER_TWO]) if tag_depth is not None: value = d.find_all(kw_pair[self.PARAMETER_ONE]) value = value[tag_depth].string value = self.check_data_type(value) if \ value else 'unknown' attrs[kw_pair[self.PARAMETER_THREE]] = value else: value = d.find( kw_pair[self.PARAMETER_ONE], { kw_pair[self.PARAMETER_TWO]: kw_pair[self.PARAMETER_THREE] }).string value = self.check_data_type(value) if \ value else 'unknown' attrs[kw_pair[self.PARAMETER_THREE]] = value obj_attr.append(attrs) except (TypeError, KeyError, IndexError): self.view.display_item(self.CONSOLIDATE_ERROR_MSG) return obj_attr def sanitise_attributes(self, obj_attrs): sanitised_obj_attrs = [] for dict in obj_attrs: attrs = {} for key, value in dict.items(): value = value.replace(key, '') key = key.replace(value, '') sanitized_key = key.replace('\n', '').replace(' ', '').lower() sanitized_value = re.sub('[ ]+', ' ', value.replace('\n', '')).strip() sanitized_value = self.check_data_type(sanitized_value) attrs[sanitized_key] = sanitized_value sanitised_obj_attrs.append(attrs) return sanitised_obj_attrs def create_web_data_object(self, attr_one, attr_two, obj_name='product'): if len(attr_two) > 0: for attr_one, attr_two in zip(attr_one, attr_two): self.view.display_item('creating object.....') attr_one.update(attr_two) new_obj = self.web_object_factory.build_object( obj_name, attr_one) self.web_data_objects.append(new_obj) else: for attr_one in attr_one: self.view.display_item('creating object.....') new_obj = self.web_object_factory.build_object( obj_name, attr_one) self.web_data_objects.append(new_obj)
class WebData(OptionFilter): TAG_TYPE = 0 CLASS_ID = 1 CLASS_ID_NAME = 2 CONSOLIDATE_DATA_PARAM_COUNT = 2 CONSOLIDATE_ERROR_MSG = 'Error consolidating data, please try again...' def __init__(self, web_request, web_object_factory, data_handler): super(OptionFilter).__init__() self.web_request = web_request self.web_object_factory = web_object_factory self.data_handler = data_handler self.filtered_data = [] self.filtered_data_keywords = [] self.filtered_recursive_data = [] self.filtered_recursive_data_keywords = [] self.web_data_objects = [] self.view = ConsoleView() def handle_command(self, args): return self.command(args, web_data_options) def get_data(self): return self.web_data_objects def clear_filtered_data(self, *args): self.view.display_item('clearing filtered data.....') del self.filtered_data[:] del self.filtered_data_keywords[:] del self.filtered_recursive_data[:] del self.filtered_recursive_data_keywords[:] del self.web_data_objects[:] def print_data(self, *args): attr = self.method_options(args[self.COMMAND_OPTION], web_data_print_options) if attr is not None: if not isinstance(attr, list): self.view.display_item(args[self.COMMAND_OPTION] + ': ' + str(attr)) else: self.view.display_items(attr) def print_web_data_object(self, *args): self.view.display_item('displaying web data objects.....') self.view.display_item('----------------------------------------------------------------') for wo in self.web_data_objects: wo.func_display_data(wo, self.view) self.view.display_item('----------------------------------------------------------------') def display_saves(self, *args): self.data_handler.display_save_list() def load_saved_data(self, *args): self.view.display_item('loading saved data.....') loaded_objs = self.data_handler.load_objects(args[self.PARAMETER_ONE]) self.web_data_objects = loaded_objs def save_data(self, *args): self.view.display_item('saving data to disk.....') self.data_handler.save_objects(self.web_data_objects, args[self.PARAMETER_ONE]) def remove_data(self, *args): self.data_handler.remove_objects(args[self.PARAMETER_ONE]) def get_request_data(self, *args): try: data_options = self.check_second_level_args(args)[self.COMMAND_OPTION] data = self.web_request.get_request_data() req_data = BeautifulSoup(data, 'html.parser') \ .findAll(data_options[self.TAG_TYPE], attrs={data_options[self.CLASS_ID]: data_options[self.CLASS_ID_NAME]}) for data in req_data: self.filtered_data.append(data) self.view.display_item('filtering data.....') except TypeError: self.view.display_item(self.COMMAND_ERROR_MSG) return def get_recursive_request_data(self, *args): try: data_options = self.check_second_level_args(args)[self.COMMAND_OPTION] for data in self.web_request.get_recursive_request_data(): self.view.display_item('filtering recursive data.....') rec_data = BeautifulSoup(data, 'html.parser') \ .find(data_options[self.TAG_TYPE], attrs={data_options[self.CLASS_ID]: data_options[self.CLASS_ID_NAME]}) self.filtered_recursive_data.append(rec_data) except TypeError: self.view.display_item(self.COMMAND_ERROR_MSG) return def filter_urls(self, *args): try: data_options = self.check_second_level_args(args)[self.COMMAND_OPTION] self.view.display_item('filtering urls.....') for data in self.filtered_data: tag_depth = self.check_data_int(data_options[self.CLASS_ID]) if tag_depth is not None: url = data.find_all(data_options[self.TAG_TYPE]) self.web_request.add_recursive_url(url[tag_depth]['href']) else: url = data.find(data_options[self.TAG_TYPE], attrs={data_options[self.CLASS_ID]: data_options[self.CLASS_ID_NAME]}) self.web_request.add_recursive_url(url['href']) except (TypeError, KeyError, IndexError): self.view.display_item(self.COMMAND_ERROR_MSG) return def set_data_keywords(self, *args): kw_pairs = self.check_second_level_args(args) if kw_pairs is not None: for kw_pair in kw_pairs: keywords = [kw_pair[0], kw_pair[1], kw_pair[2]] self.view.display_item('adding tag, class pair: ' + str(keywords)) self.filtered_data_keywords.append(keywords) def set_recursive_data_keywords(self, *args): kw_pairs = self.check_second_level_args(args) if kw_pairs is not None: for kw_pair in kw_pairs: r_keywords = [kw_pair[0], kw_pair[1], kw_pair[2]] self.view.display_item('adding tag, class pair: ' + str(r_keywords)) self.filtered_recursive_data_keywords.append(r_keywords) def consolidate_data(self, *args): params = self.check_second_level_args(args) if params is not None and self.check_second_level_param_count(params, self.CONSOLIDATE_DATA_PARAM_COUNT): func_one = self.method_options(params[self.PARAMETER_ONE][self.PARAMETER_ONE], web_data_consolidate_options) func_two = self.method_options(params[self.PARAMETER_TWO][self.PARAMETER_ONE], web_data_consolidate_options) try: attr_one = func_one(self.filtered_data, self.filtered_data_keywords, params[self.PARAMETER_ONE][self.PARAMETER_TWO], params[self.PARAMETER_ONE][self.PARAMETER_THREE]) attr_two = func_two(self.filtered_recursive_data, self.filtered_recursive_data_keywords, params[self.PARAMETER_TWO][self.PARAMETER_TWO], params[self.PARAMETER_TWO][self.PARAMETER_THREE]) self.create_web_data_object(attr_one, attr_two) except TypeError: self.view.display_item(self.CONSOLIDATE_ERROR_MSG) def filter_by_children(self, *args): obj_attrs = [] data = args[0] for d in data: names = OrderedSet() attrs = {} try: for dc in d.find_all('div'): name = dc.find('span') value = dc.find('div') if value and name is not None: if name.text not in names: names.add(name.text) attrs[name.text] = value.text obj_attrs.append(attrs) except AttributeError: self.view.display_item('Error filtering data from children.....') web_objs = self.sanitise_attributes(obj_attrs) return web_objs def filter_by_keywords(self, *args): data = args[self.PARAMETER_ONE] data_kw = args[self.PARAMETER_TWO] obj_attr = [] for d in data: try: attrs = {} for kw_pair in data_kw: tag_depth = self.check_data_int(kw_pair[self.PARAMETER_TWO]) if tag_depth is not None: value = d.find_all(kw_pair[self.PARAMETER_ONE]) value = value[tag_depth].string value = self.check_data_type(value) if value else 'unknown' attrs[kw_pair[self.PARAMETER_THREE]] = value else: value = d.find(kw_pair[self.PARAMETER_ONE], {kw_pair[self.PARAMETER_TWO]: kw_pair[self.PARAMETER_THREE]}).string value = self.check_data_type(value) if value else 'unknown' attrs[kw_pair[self.PARAMETER_THREE]] = value obj_attr.append(attrs) except (TypeError, KeyError, IndexError): self.view.display_item(self.CONSOLIDATE_ERROR_MSG) return obj_attr def sanitise_attributes(self, obj_attrs): sanitised_obj_attrs = [] for dict in obj_attrs: attrs = {} for key, value in dict.items(): value = value.replace(key, '') key = key.replace(value, '') sanitized_key = key.replace('\n', '').replace(' ', '').lower() sanitized_value = re.sub('[ ]+', ' ', value.replace('\n', '')).strip() sanitized_value = self.check_data_type(sanitized_value) attrs[sanitized_key] = sanitized_value sanitised_obj_attrs.append(attrs) return sanitised_obj_attrs def create_web_data_object(self, attr_one, attr_two, obj_name='product'): if len(attr_two) > 0: for attr_one, attr_two in zip(attr_one, attr_two): self.view.display_item('creating object.....') attr_one.update(attr_two) new_obj = self.web_object_factory.build_object(obj_name, attr_one) self.web_data_objects.append(new_obj) else: for attr_one in attr_one: self.view.display_item('creating object.....') new_obj = self.web_object_factory.build_object(obj_name, attr_one) self.web_data_objects.append(new_obj)
class DataHandler(object): def __init__(self): self.web_object_factory = WebObjectFactory() self.view = ConsoleView() self.save_list = self.init_save_list() def init_save_list(self): s_list = [] try: with open('savelist.pickle', 'rb') as input_file: s_list = pickle.load(input_file) except (EOFError, FileNotFoundError): self.view.display_item('No save list found, ' 'creating new save list.....') with open('savelist.pickle', 'wb') as output_file: pickle.dump(s_list, output_file) return s_list def save_save_list(self): try: with open('savelist.pickle', 'wb') as output_file: pickle.dump(self.save_list, output_file) except (EOFError, FileNotFoundError): self.view.display_item('Error saving save list.....') def display_save_list(self): self.view.display_item('displaying file save locations.....') self.view.display_items(self.save_list) def add_save_list(self, save_location): save_location = abspath(save_location) if save_location not in self.save_list: self.save_list.append(save_location) self.save_save_list() def remove_save_list(self, save_location): save_location = abspath(save_location) self.save_list.remove(save_location) self.save_save_list() def save_objects(self, web_objs, path): try: sys.setrecursionlimit(50000) objs = [] for obj in web_objs: dict = obj.__dict__.copy() for key in obj.__dict__: if key.startswith('__') and key.endswith('__'): del dict[key] objs.append(dict) output_file = open(path, 'wb') pickle.dump(objs, output_file) self.add_save_list(path) except FileNotFoundError: self.view.display_item('Error saving file.....') def remove_objects(self, path): try: self.view.display_item('removing file ' + path + '.....') remove(path) self.remove_save_list(path) except FileNotFoundError: self.view.display_item('File ' + path + ' not found.....') def load_objects(self, path): loaded_objs = [] try: with open(path, 'rb') as input_file: web_objs = pickle.load(input_file) for obj in web_objs: loaded_objs.append( self.web_object_factory.build_object('product', obj)) except FileNotFoundError: self.view.display_item('File ' + path + ' not found.....') return loaded_objs
class WebRequest(OptionFilter, MessageHandler): PRINT_DATA_MSG = 'No data to display.....' URL_NOT_VALID_MSG = 'please enter a valid url.....' CONNECTION_ERROR_MSG = 'data fetch error.....' def __init__(self): super(OptionFilter).__init__() self.url = '' self.url_padding = '' self.recursive_urls = [] self.requests = requests self.request_data = None self.requests_status_code = None self.recursive_request_data = [] self.recursive_request_data_count = 0 self.view = ConsoleView() def handle_command(self, args): return self.command(args, web_request_options) def print_data(self, *args): attr = self.method_options(args[self.COMMAND_OPTION], web_request_print_options) if attr is not None: if isinstance(attr, str): self.view.display_item(args[self.COMMAND_OPTION] + ': ' + str(attr)) else: self.view.display_items(attr) def set_url(self, *args): match = urlparse(args[self.COMMAND_OPTION]) if match[self.URL_SCHEME] == self.URL_SCHEME_HTTP or \ match[self.URL_SCHEME] == self.URL_SCHEME_HTTPS: self.url = args[self.COMMAND_OPTION] self.view.display_item('setting url.....') else: self.view.display_item(self.URL_NOT_VALID_MSG) def set_url_padding(self, *args): match = urlparse(args[self.COMMAND_OPTION]) if match[self.URL_SCHEME] == self.URL_SCHEME_HTTP or \ match[self.URL_SCHEME] == self.URL_SCHEME_HTTPS: self.url_padding = args[self.COMMAND_OPTION] self.view.display_item('setting url padding.....') else: self.view.display_item(self.URL_NOT_VALID_MSG) def add_recursive_url(self, *args): if self.check_url((self.url_padding + args[self.COMMAND_OPTION])): self.recursive_urls.append(self.url_padding + args[self.COMMAND_OPTION]) self.view.display_item('adding url.....') else: self.view.display_item(self.URL_NOT_VALID_MSG) def fetch_html(self, *args): if MessageHandler.check_none_condition(self, self.url, 'url not set.....'): self.view.display_item('fetching html from ' + self.url + '.....') try: result = self.requests.get(self.url) self.requests_status_code = result.status_code self.request_data = result.text except requests.RequestException: self.view.display_item(self.CONNECTION_ERROR_MSG) def recursive_fetch(self, *args): try: if len(self.recursive_urls) > 0: self.view.display_item('fetching recursive html.....') for url in self.recursive_urls: self.view.display_item('fetching html from ' + url + '.....') result = self.requests.get(url) self.requests_status_code = result.status_code self.recursive_request_data.append(result.text) self.recursive_request_data_count += 1 else: self.view.display_item('no recursive urls set.....') except requests.RequestException: self.view.display_item(self.CONNECTION_ERROR_MSG) def get_request_data(self): return self.request_data def get_recursive_request_data(self): return self.recursive_request_data
class WebRequest(OptionFilter, MessageHandler): PRINT_DATA_MSG = 'No data to display.....' URL_NOT_VALID_MSG = 'please enter a valid url.....' CONNECTION_ERROR_MSG = 'data fetch error.....' def __init__(self): super(OptionFilter).__init__() self.url = '' self.url_padding = '' self.recursive_urls = [] self.requests = requests self.request_data = None self.requests_status_code = None self.recursive_request_data = [] self.recursive_request_data_count = 0 self.view = ConsoleView() def handle_command(self, args): return self.command(args, web_request_options) def print_data(self, *args): attr = self.method_options(args[self.COMMAND_OPTION], web_request_print_options) if attr is not None: if isinstance(attr, str): self.view.display_item(args[self.COMMAND_OPTION] + ': ' + str(attr)) else: self.view.display_items(attr) def set_url(self, *args): match = urlparse(args[self.COMMAND_OPTION]) if match[self.URL_SCHEME] == self.URL_SCHEME_HTTP or match[self.URL_SCHEME] == self.URL_SCHEME_HTTPS: self.url = args[self.COMMAND_OPTION] self.view.display_item('setting url.....') else: self.view.display_item(self.URL_NOT_VALID_MSG) def set_url_padding(self, *args): match = urlparse(args[self.COMMAND_OPTION]) if match[self.URL_SCHEME] == self.URL_SCHEME_HTTP or match[self.URL_SCHEME] == self.URL_SCHEME_HTTPS: self.url_padding = args[self.COMMAND_OPTION] self.view.display_item('setting url padding.....') else: self.view.display_item(self.URL_NOT_VALID_MSG) def add_recursive_url(self, *args): if self.check_url((self.url_padding + args[self.COMMAND_OPTION])): self.recursive_urls.append(self.url_padding + args[self.COMMAND_OPTION]) self.view.display_item('adding url.....') else: self.view.display_item(self.URL_NOT_VALID_MSG) def fetch_html(self, *args): if MessageHandler.check_none_condition(self, self.url, 'url not set.....'): self.view.display_item('fetching html from ' + self.url + '.....') try: result = self.requests.get(self.url) self.requests_status_code = result.status_code self.request_data = result.text except requests.RequestException: self.view.display_item(self.CONNECTION_ERROR_MSG) def recursive_fetch(self, *args): try: if len(self.recursive_urls) > 0: self.view.display_item('fetching recursive html.....') for url in self.recursive_urls: self.view.display_item('fetching html from ' + url + '.....') result = self.requests.get(url) self.requests_status_code = result.status_code self.recursive_request_data.append(result.text) self.recursive_request_data_count += 1 else: self.view.display_item('no recursive urls set.....') except requests.RequestException: self.view.display_item(self.CONNECTION_ERROR_MSG) def get_request_data(self): return self.request_data def get_recursive_request_data(self): return self.recursive_request_data