class DataHandler: def __init__(self): self.api = API() self.raw_data = [ self.api.retrieve("cpu"), self.api.retrieve("video-card"), self.api.retrieve("power-supply"), self.api.retrieve("memory"), self.api.retrieve("motherboard"), self.api.retrieve("internal-hard-drive"), self.api.retrieve("case") ] def format_data(self): """Convert self.raw_data into json, get the data we need, and return a list with the data on various parts""" json_data = [None, None, None, None, None, None, None] for i in range(len(self.raw_data)): top_level_key = next(self.raw_data[i].keys()) json_data[i] = json.loads( self.raw_data[i].to_json())[top_level_key] formatted_data = { "cpu": [], "gpu": [], "psu": [], "ram": [], "mobo": [], "hdd": [], "case": [] } key = next(formatted_data).key() for i in range(0, len(json_data)): formatted_data[key].append({ f"{json_data[i]['brand']} {json_data[i]['model']}": f"${json_data[i]['price'][1]}" }) return formatted_data def get_part_data(self, part): """Retrieve the data on a particular part out of the mess that is the return of format_data Parts must be cpu, gpu, psu, ram, mobo, hdd, or case""" try: return self.format_data()[part] except KeyError: raise InvalidPart( "User entered a part that isn\'t supported/doesn\'t exist")
os.chdir(os.path.dirname(os.path.realpath(__file__))) print(os.path.abspath(".")) api = API() date = datetime.today().strftime('%Y-%m-%d') if mode != "gpu": if scrapeWeb: try: os.remove('ulCPU.json') except OSError: pass os.system('scrapy crawl ulCPU -o ulCPU.json') if priceSource == "pcpp": cpu_data = api.retrieve("cpu") pcppCPUs = {} print(len(cpu_data["cpu"]), "pcpartpicker cpus") print(cpu_data["cpu"][0]) with open(os.path.join(os.path.dirname(__file__), 'pcpp-cpus.csv'), 'w', newline='') as f: for cpu in cpu_data["cpu"]: f.write(str(cpu) + '\r\n') if not cpu.price.amount.is_zero() and not cpuBlacklists.search( cpu.model): model = cpu.model if "Core i" in model: model += " " if model in pcppCPUs: pcppCPUs[model]["price"] = min(
from pcpartpicker import API import pandas as pd import numpy as np api = API(multithreading=False) api.set_region('in') df = api.retrieve('cpu')['cpu'] new = [] for obj in df: new.append({'brand': obj.brand if obj.brand else np.nan, 'model': obj.model if obj.model else np.nan, 'cores': obj.cores if obj.cores else np.nan, 'base_clock': obj.base_clock.cycles if obj.base_clock else np.nan, 'boost_clock': obj.base_clock.cycles if obj.boost_clock else np.nan, 'tdp': obj.tdp if obj.tdp else np.nan, 'integrated_graphics': np.nan if obj.integrated_graphics is None else obj.integrated_graphics, 'multithreading': np.nan if obj.multithreading is None else obj.multithreading}) new_df = pd.DataFrame(new) new_df.to_csv('pcpp-cpu.csv', index=False)
'internal-hard-drive': 'internalharddrives', 'video-card': 'videocards', 'power-supply': 'powersupplies', 'monitor': 'monitors', 'case': 'casepcs', 'memory': 'memories' } database = PCDatabase(hostname="127.0.0.1", username="******", password="", database_name="reabpc") #fetch data from api to dictionary for component in dataEntity.keys(): data.update(json.loads(api.retrieve(component).to_json())) #clean data for component in data.keys(): data[component] = [ dict(filter(lambda y: y[0] in dataEntity[component], x.items())) for x in data[component] ] # overwrite max_ram key for i in range(len(data['motherboard'])): ((data['motherboard'])[i])['max_ram'] = (( data['motherboard'])[i])['max_ram']['total'] # overwrite capacity key for i in range(len(data['internal-hard-drive'])):
from pcpartpicker import API import pandas as pd import numpy as np api = API(multithreading=False) api.set_region('in') df = api.retrieve('video-card')['video-card'] new = [] for obj in df: new.append({ 'brand': obj.brand if obj.brand else np.nan, 'model': obj.model if obj.model else np.nan, 'chipset': obj.chipset if obj.chipset else np.nan, 'vram': obj.vram.total if obj.vram else np.nan, 'core_clock': obj.core_clock.cycles if obj.core_clock else np.nan, 'boost_clock': obj.boost_clock.cycles if obj.boost_clock else np.nan, 'interface': obj.interface if obj.interface else np.nan, 'color': obj.color if obj.color else np.nan }) new_df = pd.DataFrame(new)
class PCParts: log.basicConfig(filename='debug.log', level=log.DEBUG) def __init__(self, debug=False, region='us'): self.pcpp_api = API(region) self.parts = {} self.path = './Parts_Scraper/pickle/' log.debug("PCParts object init") #self.refresh_data() def set_region(self, new_region): ''' Set region per pcpartspicker API requirements. ''' if new_region in self.pcpp_api.supported_regions: self.pcpp_api.set_region(new_region) return def refresh_data(self): ''' Refresh temp storage cache ''' self.__load_part_data(refresh=True) def get_JSON(self, part='cpu'): if part not in self.pcpp_api.supported_parts: raise Exception("Argument 'part' is not a supported part.") return if not self.parts: self.__load_part_data() js = self.parts[part].to_json(orient='split') return js def get_components(self): ''' Returns dictionary of parts and their components as an array ''' if not self.parts: log.debug("Loading data for all parts...") self.__load_part_data("all") log.debug("Getting part components...") components = {} for part, df in self.parts.items(): components[part] = [c for c in df.columns] return components def get_quantities(self): ''' List out all parts and there quantities ''' if not self.parts: self.__load_part_data("all") log.debug("Getting part quantities...") num_parts = 0 quantities = {} for part, df in self.parts.items(): quantities[part] = len(df.index) num_parts += len(df.index) log.debug("Quantity of parts: " + str(num_parts)) return quantities def __load_part_data(self, refresh=False): ''' Loads all part data. Refresh=true will refresh temp data storage cache ''' if refresh: load_type = "Refreshing" else: load_type = "Loading" log.debug("{0} data...".format(load_type)) for part in self.pcpp_api.supported_parts: self.__add_part_df_to_dict(part, refresh) log.debug("{0} complete.".format(load_type)) return def __add_part_df_to_dict(self, part="all", refresh=False): ''' Convert part objects retrieved from PcPartsPicker API to dataframe and save as temp data storage(pickle). If refresh is specified, overwrite pickle storage files ''' path = self.path + part + '.pkl' if os.path.exists(path) and not refresh: df = pd.read_pickle(path) else: parts_dict = defaultdict(list) retrieved_parts = self.pcpp_api.retrieve(part)[part] for p in retrieved_parts: for key, comp in vars(p).items(): parts_dict[key].append(comp) df = pd.DataFrame.from_dict(parts_dict) df.to_pickle(path) self.parts[part] = df return