Example #1
0
class DataHandler:
    def __init__(self):
        self.api = API()
        self.raw_data = [
            self.api.retrieve("cpu"),
            self.api.retrieve("video-card"),
            self.api.retrieve("power-supply"),
            self.api.retrieve("memory"),
            self.api.retrieve("motherboard"),
            self.api.retrieve("internal-hard-drive"),
            self.api.retrieve("case")
        ]

    def format_data(self):
        """Convert self.raw_data into json, get the data we need, and return a list with the data on various parts"""

        json_data = [None, None, None, None, None, None, None]
        for i in range(len(self.raw_data)):
            top_level_key = next(self.raw_data[i].keys())
            json_data[i] = json.loads(
                self.raw_data[i].to_json())[top_level_key]

        formatted_data = {
            "cpu": [],
            "gpu": [],
            "psu": [],
            "ram": [],
            "mobo": [],
            "hdd": [],
            "case": []
        }

        key = next(formatted_data).key()
        for i in range(0, len(json_data)):
            formatted_data[key].append({
                f"{json_data[i]['brand']} {json_data[i]['model']}":
                f"${json_data[i]['price'][1]}"
            })
        return formatted_data

    def get_part_data(self, part):
        """Retrieve the data on a particular part out of the mess that is the return of format_data
           Parts must be cpu, gpu, psu, ram, mobo, hdd, or case"""
        try:
            return self.format_data()[part]
        except KeyError:
            raise InvalidPart(
                "User entered a part that isn\'t supported/doesn\'t exist")
os.chdir(os.path.dirname(os.path.realpath(__file__)))
print(os.path.abspath("."))

api = API()
date = datetime.today().strftime('%Y-%m-%d')

if mode != "gpu":
    if scrapeWeb:
        try:
            os.remove('ulCPU.json')
        except OSError:
            pass

        os.system('scrapy crawl ulCPU -o ulCPU.json')
    if priceSource == "pcpp":
        cpu_data = api.retrieve("cpu")
        pcppCPUs = {}
        print(len(cpu_data["cpu"]), "pcpartpicker cpus")
        print(cpu_data["cpu"][0])
        with open(os.path.join(os.path.dirname(__file__), 'pcpp-cpus.csv'),
                  'w',
                  newline='') as f:
            for cpu in cpu_data["cpu"]:
                f.write(str(cpu) + '\r\n')
                if not cpu.price.amount.is_zero() and not cpuBlacklists.search(
                        cpu.model):
                    model = cpu.model
                    if "Core i" in model:
                        model += " "
                    if model in pcppCPUs:
                        pcppCPUs[model]["price"] = min(
from pcpartpicker import API
import pandas as pd
import numpy as np

api = API(multithreading=False)
api.set_region('in')
df = api.retrieve('cpu')['cpu']

new = []
for obj in df:
    new.append({'brand': obj.brand if obj.brand else np.nan,
                'model': obj.model if obj.model else np.nan,
                'cores': obj.cores if obj.cores else np.nan,
                'base_clock': obj.base_clock.cycles if obj.base_clock else np.nan,
                'boost_clock': obj.base_clock.cycles if obj.boost_clock else np.nan,
                'tdp': obj.tdp if obj.tdp else np.nan,
                'integrated_graphics':  np.nan if obj.integrated_graphics is None else obj.integrated_graphics,
                'multithreading': np.nan if obj.multithreading is None else obj.multithreading})

new_df = pd.DataFrame(new)
new_df.to_csv('pcpp-cpu.csv', index=False)
Example #4
0
    'internal-hard-drive': 'internalharddrives',
    'video-card': 'videocards',
    'power-supply': 'powersupplies',
    'monitor': 'monitors',
    'case': 'casepcs',
    'memory': 'memories'
}

database = PCDatabase(hostname="127.0.0.1",
                      username="******",
                      password="",
                      database_name="reabpc")

#fetch data from api to dictionary
for component in dataEntity.keys():
    data.update(json.loads(api.retrieve(component).to_json()))

#clean data
for component in data.keys():
    data[component] = [
        dict(filter(lambda y: y[0] in dataEntity[component], x.items()))
        for x in data[component]
    ]

# overwrite max_ram key
for i in range(len(data['motherboard'])):
    ((data['motherboard'])[i])['max_ram'] = ((
        data['motherboard'])[i])['max_ram']['total']

# overwrite capacity key
for i in range(len(data['internal-hard-drive'])):
from pcpartpicker import API
import pandas as pd
import numpy as np

api = API(multithreading=False)
api.set_region('in')
df = api.retrieve('video-card')['video-card']

new = []
for obj in df:

    new.append({
        'brand':
        obj.brand if obj.brand else np.nan,
        'model':
        obj.model if obj.model else np.nan,
        'chipset':
        obj.chipset if obj.chipset else np.nan,
        'vram':
        obj.vram.total if obj.vram else np.nan,
        'core_clock':
        obj.core_clock.cycles if obj.core_clock else np.nan,
        'boost_clock':
        obj.boost_clock.cycles if obj.boost_clock else np.nan,
        'interface':
        obj.interface if obj.interface else np.nan,
        'color':
        obj.color if obj.color else np.nan
    })

new_df = pd.DataFrame(new)
Example #6
0
class PCParts:

    log.basicConfig(filename='debug.log', level=log.DEBUG)

    def __init__(self, debug=False, region='us'):
        self.pcpp_api = API(region)
        self.parts = {}
        self.path = './Parts_Scraper/pickle/'
        log.debug("PCParts object init")

        #self.refresh_data()

    def set_region(self, new_region):
        '''
        Set region per pcpartspicker API requirements.
        '''

        if new_region in self.pcpp_api.supported_regions:
            self.pcpp_api.set_region(new_region)
        return

    def refresh_data(self):
        '''
        Refresh temp storage cache
        '''
        self.__load_part_data(refresh=True)

    def get_JSON(self, part='cpu'):
        if part not in self.pcpp_api.supported_parts:
            raise Exception("Argument 'part' is not a supported part.")
            return

        if not self.parts: self.__load_part_data()
        js = self.parts[part].to_json(orient='split')
        return js

    def get_components(self):
        '''
        Returns dictionary of parts and their components as an array
        '''

        if not self.parts:
            log.debug("Loading data for all parts...")
            self.__load_part_data("all")

        log.debug("Getting part components...")

        components = {}
        for part, df in self.parts.items():
            components[part] = [c for c in df.columns]

        return components

    def get_quantities(self):
        '''
        List out all parts and there quantities
        '''

        if not self.parts: self.__load_part_data("all")

        log.debug("Getting part quantities...")

        num_parts = 0
        quantities = {}
        for part, df in self.parts.items():
            quantities[part] = len(df.index)
            num_parts += len(df.index)

        log.debug("Quantity of parts: " + str(num_parts))

        return quantities

    def __load_part_data(self, refresh=False):
        '''
        Loads all part data. Refresh=true will refresh temp data storage cache
        '''
        if refresh: load_type = "Refreshing"
        else: load_type = "Loading"

        log.debug("{0} data...".format(load_type))

        for part in self.pcpp_api.supported_parts:
            self.__add_part_df_to_dict(part, refresh)

        log.debug("{0} complete.".format(load_type))

        return

    def __add_part_df_to_dict(self, part="all", refresh=False):
        '''
        Convert part objects retrieved from PcPartsPicker API to dataframe
            and save as temp data storage(pickle).
        If refresh is specified, overwrite pickle storage files
        '''

        path = self.path + part + '.pkl'
        if os.path.exists(path) and not refresh:
            df = pd.read_pickle(path)
        else:
            parts_dict = defaultdict(list)
            retrieved_parts = self.pcpp_api.retrieve(part)[part]
            for p in retrieved_parts:
                for key, comp in vars(p).items():
                    parts_dict[key].append(comp)
            df = pd.DataFrame.from_dict(parts_dict)
            df.to_pickle(path)

        self.parts[part] = df
        return