Example #1
0
 def connect(self,
             reconnect=False,
             dbname='russtat',
             user='******',
             password=None,
             host='127.0.0.1',
             port='5432'):
     if not self.con is None and not reconnect:
         return True
     if password is None:
         password = input('>> Enter password:'******'Connected to {self._connparams[0]} as {self._connparams[1]} at {self._connparams[3]}:{self._connparams[4]}'
         )
         return True
     except Exception as err:
         self.con = None
         print(err)
     return False
Example #2
0
 def disconnect(self):
     if self.con is None: return True
     try:
         self.con.commit()
         self.con.close()
         self.con = None
         report(
             f'Disconnected from {self._connparams[0] if self._connparams else "DB"}'
         )
         return True
     except:
         pass
     return False
Example #3
0
 def add_data(self, data_json, disable_triggers=False, on_error=print):
     if not data_json:
         report('NONE data!', force=True)
         return None
     triggers_disabled = False
     if disable_triggers:
         triggers_disabled = self.disable_triggers(on_error=on_error)
     cur = self.exec(
         f"select * from public.add_data($${data_json}$$::text);",
         commit=True,
         on_error=on_error)
     if cur:
         res = cur.fetchone()
         if triggers_disabled:
             self.enable_triggers(on_error=on_error)
         return res
     else:
         raise Exception(self.dbmessages)
Example #4
0
    def find_datasets(self, pattern, regex=False, case_sense=False, fullmatch=False):
        results = []
        if regex:
            regexp = re.compile(pattern) if case_sense else re.compile(pattern, re.I)

        if not self.datasets: self.update_dataset_list()

        for item in self.datasets:
            if not 'title' in item: continue
            title = item['title']
            if regex:
                if (fullmatch and regexp.fullmatch(title)) or (not fullmatch and regexp.search(title)):
                    results.append(item)
            else:
                if (fullmatch and \
                    ((case_sense and title == pattern) or (not case_sense and title.lower() == pattern.lower()))) or \
                (not fullmatch and \
                    ((case_sense and pattern in title) or (not case_sense and pattern.lower() in title.lower()))):
                    results.append(item)
        report(f"Found {len(results)} matches for query '{pattern}'")
        return results
Example #5
0
    def get_many(self, datasets=0, xmlfilenames='auto', overwrite=True, del_xml=True, 
              save2json='auto', loadfromjson='auto',
              processes='auto', wait=True, on_dataset=None, on_dataset_kwargs=None,
              on_results_ready=None, on_error=None, on_stopcheck=None):

        args = []

        if datasets is None and loadfromjson != 'auto' and not loadfromjson is None:

            if is_iterable(loadfromjson):
                for json_file in loadfromjson:
                    args.append((None, None, False, False, None, json_file, on_dataset, on_dataset_kwargs))
            else:
                args.append((None, None, False, False, None, loadfromjson, on_dataset, on_dataset_kwargs))

        else:

            if not self.datasets: self.update_dataset_list()

            if datasets is None:
                datasets = self.datasets
            elif isinstance(datasets, str):
                datasets = self.find_datasets(datasets)
            elif isinstance(datasets, int):
                datasets = [self.datasets[datasets]]
            elif is_iterable(datasets):
                if len(datasets) == 0:
                    report('Empty datasets parameter!', True)
                    return None          
                if isinstance(datasets[0], int) or isinstance(datasets[0], str):
                    datasets = [self[k] for k in datasets]            
            else:
                report('Bad type: datasets', True)
                return None

            if not datasets:
                report('No datasets matching your request.', True)
                return None

            # prepare args for worker function        
            for i, ds in enumerate(datasets):
                try:                
                    if is_iterable(xmlfilenames):
                        xmlfilename = xmlfilenames[i]
                    else:
                        xmlfilename = xmlfilenames                    
                    
                    if is_iterable(save2json):
                        save2json_ = save2json[i]            
                    else:
                        save2json_ = save2json

                    if is_iterable(loadfromjson):
                        loadfromjson_ = loadfromjson[i]            
                    else:
                        loadfromjson_ = loadfromjson
                    
                    args.append((ds, xmlfilename, overwrite, del_xml, save2json_, loadfromjson_, on_dataset, on_dataset_kwargs))
                    
                except Exception as err:
                    report(err, True)
                    return None

        if processes == 'auto': processes = None
        
        with Pool(processes=processes) as pool:
            try:
                result = pool.starmap_async(self.get_one, args, callback=on_results_ready, error_callback=on_error)
                pool.close()
                if wait: pool.join()
                return result
            except Exception as err:
                report(err, True)
                return None
Example #6
0
    def get_one(self, dataset, xmlfilename='auto', overwrite=True, del_xml=True, 
                save2json='auto', loadfromjson='auto', on_dataset=None, on_dataset_kwargs=None):        

        if loadfromjson is None or loadfromjson == 'auto':
            if isinstance(dataset, str):
                datasets = self.find_datasets(dataset)
                if not datasets:
                    report(f"No datasets match query '{dataset}'")
                    return None
                dataset = datasets[0]
            elif isinstance(dataset, int):
                try:
                    dataset = self[dataset]
                except Exception as err:
                    report(err)
                    return None
            elif not isinstance(dataset, dict):
                report(f"Bad data type for 'dataset': {type(dataset)}")
                return None
        
        if loadfromjson:
            if loadfromjson == 'auto':
                loadfromjson = os.path.join(self.root_folder,
                                            dataset.get('identifier', 'dataset') + '.json')
            
            ds = None
            try:
                with open(os.path.abspath(loadfromjson), 'r', encoding='utf-8') as infile:
                    ds = json.load(infile, object_hook=Russtat.json_hook)             
            except Exception as err:
                report(f"{err}   Importing from XML...")
                return self.get_one(dataset, xmlfilename, overwrite, del_xml, save2json, None, on_dataset, on_dataset_kwargs)
            else:
                report(f'Loaded from JSON ({loadfromjson})')
                if on_dataset: 
                    if on_dataset_kwargs:
                        on_dataset(ds, **on_dataset_kwargs)
                    else:
                        on_dataset(ds)
                return ds

        if not 'link' in dataset:
            report('Dataset has no "link" object!')
            #return None

        if xmlfilename == 'auto':
            xmlfilename = dataset.get('identifier', 'dataset') + '.xml'

        outputfile = os.path.abspath(os.path.join(self.root_folder, xmlfilename))
        if not os.path.exists(outputfile) or overwrite:       
            try:
                os.remove(outputfile)
                report(f'Deleted existing XML ({outputfile})')
            except Exception as err:
                report(err)                
            try:
                res = requests.get(dataset['link'], timeout=self.connection_timeout)
                if not res: 
                    report(f"Could not retrieve dataset from {dataset['link']}")
                    #return None

                with open(outputfile, 'wb') as outfile:
                    outfile.write(res.content)
                report(f"Downloaded XML from {dataset['link']} to {outputfile}")

            except Exception as err:
                report(err)
                #return None

        ds = {'prepared': dt.now(), 'id': dataset['identifier'], 'agency_id': '', 'codes': {}, 
              'full_name': dataset['title'], 'unit': '', 'periodicity': {'value': '', 'releases': '', 'next': dt.fromisoformat('1900-01-01')}, 
              'data_range': (-1, -1), 'updated': dt.fromisoformat('1900-01-01'), 'methodology': '', 'agency_name': '', 'agency_dept': '', 
              'classifier': {'id': '', 'path': ''}, 'prepared_by': {'name': '', 'contacts': ''}, 'data': []}

        try:
            tree = ET.parse(outputfile, ET.XMLParser(encoding='utf-8'))
            ds_rootnode = tree.getroot()

            # Header
            node_hdr = ds_rootnode.find('message:Header', XML_NS)        
            ds['prepared'] = dt.fromisoformat(self._get_text(node_hdr, 'message:Prepared', '1900-01-01')) - timedelta(hours=3)
            ds['id'] = self._get_text(node_hdr, 'message:DataSetID')
            ds['agency_id'] = self._get_text(node_hdr, 'message:DataSetAgency')

            # Codes
            ds['codes'] = self._get_codes(ds_rootnode)

            # Description
            node_desc = ds_rootnode.find('message:Description', XML_NS).find('message:Indicator', XML_NS)
            ds['full_name'] = ' '.join(self._get_attr(node_desc, 'name').split())
            ds['unit'] = self._get_attr(node_desc, 'value', ['message:Units', 'message:Unit'])
            ds['periodicity']['value'] = self._get_attr(node_desc, 'value', ['message:Periodicities', 'message:Periodicity'])
            ds['periodicity']['releases'] = self._get_attr(node_desc, 'releases', ['message:Periodicities', 'message:Periodicity'])
            ds['periodicity']['next'] = dt.strptime(self._get_attr(node_desc, 'next-release', ['message:Periodicities', 'message:Periodicity'], '01.01.1900'), '%d.%m.%Y') - timedelta(hours=3)
            ds['data_range'] = tuple(int(self._get_attr(node_desc, x, 'message:DataRange', '0')) for x in ('start', 'end'))
            ds['updated'] = dt.fromisoformat(self._get_attr(node_desc, 'value', 'message:LastUpdate', '1900-01-01')) - timedelta(hours=3)
            ds['methodology'] = ' '.join(self._get_attr(node_desc, 'value', 'message:Methodology').split())
            ds['agency_name'] = self._get_attr(node_desc, 'value', 'message:Organization')
            ds['agency_dept'] = self._get_attr(node_desc, 'value', 'message:Department')
            ds['classifier']['id'] = self._get_attr(node_desc, 'id', ['message:Allocations', 'message:Allocation'])
            ds['classifier']['path'] = self._get_text(node_desc, ['message:Allocations', 'message:Allocation', 'message:Name'])
            ds['prepared_by']['name'] = self._get_text(node_desc, ['message:Responsible', 'message:Name'])
            ds['prepared_by']['contacts'] = self._get_text(node_desc, ['message:Responsible', 'message:Contacts'])
            ds['data'] = self._get_data(ds_rootnode, ds['codes'])

            if save2json:
                if save2json == 'auto':
                    save2json = dataset.get('identifier', 'dataset') + '.json'
                try:
                    json_file = os.path.abspath(os.path.join(self.root_folder, save2json))
                    with open(json_file, 'w', encoding='utf-8') as outfile:
                        json.dump(ds, outfile, ensure_ascii=False, indent=4, default=str)
                    report(f'Saved to JSON ({json_file})')
                except Exception as err:
                    report(err)

            if del_xml:
                try:
                    os.remove(outputfile)
                    report(f'Deleted XML ({outputfile})')
                except Exception as err:
                    report(err)

            if on_dataset: 
                if on_dataset_kwargs:
                    on_dataset(ds, **on_dataset_kwargs)
                else:
                    on_dataset(ds)

        except Exception as err:
            report(err)
            
            # try to process empty dataset
            if on_dataset: 
                try:
                    if on_dataset_kwargs:
                        on_dataset(ds, **on_dataset_kwargs)
                    else:
                        on_dataset(ds)
                except:
                    pass

            if del_xml:
                try:
                    os.remove(outputfile)
                    report(f'Deleted XML ({outputfile})')
                except Exception as err2:
                    report(err2)
                    
            return ds

        return ds
Example #7
0
    def _get_data(self, ds_rootnode, codes, max_row=-1):
        n = 0
        dataset = ds_rootnode.find('message:DataSet', XML_NS)
        if not dataset:             
            return []
        data = []
        for item in dataset.iterfind('generic:Series', XML_NS):
            
            try:

                # period and unit
                per, ei = ('', '')
                try:
                    for attr in item.find('generic:Attributes', XML_NS).iterfind('generic:Value', XML_NS):
                        concept = self._get_attr(attr, 'concept')
                        val = self._get_attr(attr, 'value')
                        if concept == 'EI':
                            ei = val
                        elif concept == 'PERIOD':
                            per = val
                except:
                    per, ei = ('', '')

                # year
                try:
                    tim = int(self._get_text(item, ['generic:Obs', 'generic:Time'], '0'))
                except:
                    tim = 0

                # value
                try:
                    val = float(self._get_attr(item, 'value', ['generic:Obs', 'generic:ObsValue'], '0.0').replace(',', '.').replace(' ', ''))
                except:
                    val = 0.0

                # classifier and class
                try:
                    for key_item in item.find('generic:SeriesKey', XML_NS).iterfind('generic:Value', XML_NS):                
                    
                        key_concept = self._get_attr(key_item, 'concept')
                        key_key = self._get_attr(key_item, 'value')
                        classifier, cl = ('', '')
                        
                        for code in codes:
                            if code == key_concept:
                                classifier = codes[code]['name']
                                for cval in codes[code]['values']:
                                    if cval[0] == key_key:
                                        cl = cval[1]
                                        break
                                break
                        
                        data.append((classifier, cl, ei, per, tim, val))
                        n += 1
                        if max_row > 0 and n > max_row: break
                        
                except:
                    data.append(('', '', ei, per, tim, val))
                    n += 1
                    if max_row > 0 and n > max_row: break

            except Exception as err:
                report(err)
                break

        return data
Example #8
0
    def update_dataset_list(self, xmlfilename='list.xml', xml_only=True, overwrite=False, del_xml=True, 
                            save2json='list_json.json', loadfromjson='list_json.json'):
        self.datasets = []

        if loadfromjson:
            try:
                json_file = os.path.abspath(os.path.join(self.root_folder, loadfromjson))
                with open(json_file, 'r', encoding='utf-8') as infile:
                    self.datasets = json.load(infile)
                report(f'Loaded from JSON ({json_file}): {len(self.datasets)} datasets')
                return
            except Exception as err:
                report(f"{err}   Importing from XML...")
                self.update_dataset_list(xmlfilename, xml_only, overwrite, save2json, None)
            
        outputfile = os.path.abspath(os.path.join(self.root_folder, xmlfilename))
        if not os.path.exists(outputfile) or overwrite:       
            try:
                os.remove(outputfile)
                report(f'Deleted existing XML ({outputfile})')
            except Exception as err:
                report(err)                
            try:
                res = requests.get(URL_EMISS_LIST, timeout=self.connection_timeout)
                if not res: 
                    report(f'Could not retrieve dataset list from {URL_EMISS_LIST}')
                    return

                with open(outputfile, 'wb') as outfile:
                    outfile.write(res.content)
                report(f'Downloaded XML from {URL_EMISS_LIST} to {outputfile}')

            except Exception as err:
                report(err)
                return
        
        tree = ET.parse(outputfile, ET.XMLParser(encoding='utf-8'))
        root_el = tree.getroot()

        for item in root_el.find('meta').iter('item'):
            if xml_only and item.find('format').text != 'xml':
                continue
            self.datasets.append({child.tag: child.text.strip('"').strip() for child in item})
        report(f'Loaded from XML ({outputfile}): {len(self.datasets)} datasets')

        if del_xml:
            try:
                os.remove(outputfile)
                report(f'Deleted XML ({outputfile})')
            except Exception as err:
                report(err)

        if save2json:
            try:
                json_file = os.path.abspath(os.path.join(self.root_folder, save2json))
                with open(json_file, 'w', encoding='utf-8') as outfile:
                    json.dump(self.datasets, outfile, ensure_ascii=False, indent=4)
                report(f'Saved to JSON ({json_file})')
            except Exception as err:
                report(err)