def lc_get_close_matches(lbl, possibilities, num_matches=3, cutoff=0.6): '''Return list of closest matches to lbl from possibilities (case-insensitive).''' if USING_PYTHON2: lc_lbl = str.lower(unicode(lbl)) lc_possibilities = [str.lower(unicode(p)) for p in possibilities] else: lc_lbl = str.lower(lbl) lc_possibilities = [str.lower(p) for p in possibilities] lc_matches = get_close_matches(lc_lbl, lc_possibilities, num_matches, cutoff) return [possibilities[lc_possibilities.index(m)] for m in lc_matches]
def lc_get_close_matches(lbl, possibilities, num_matches=3, cutoff=0.6): '''Return list of closest matches to lbl from possibilities (case-insensitive).''' # Strip any non-strings so str.lower() doesn't crash. possibilities = [p for p in possibilities if isinstance(p, basestring)] if USING_PYTHON2: lc_lbl = str.lower(unicode(lbl)) lc_possibilities = [str.lower(unicode(p)) for p in possibilities] else: lc_lbl = str.lower(lbl) lc_possibilities = [str.lower(p) for p in possibilities] lc_matches = get_close_matches(lc_lbl, lc_possibilities, num_matches, cutoff) return [possibilities[lc_possibilities.index(m)] for m in lc_matches]
def _sign(self, args): params = list(zip(list(args.keys()), list(args.values()))) params.sort(key=lambda k: str.lower(k[0])) hash_str = "&".join( ["=".join( [str.lower(r[0]), str.lower( urllib.parse.quote_plus(str(r[1])) ).replace("+", "%20")] ) for r in params] ) signature = base64.encodestring(hmac.new(self.api_secret.encode('utf-8'), hash_str.encode('utf-8'), hashlib.sha1).digest()).strip() self.signature = signature
def __init__(self, logfile, instance, namespace, location, remote_conn_details): super(ConnectMUMPS, self).__init__() self.type = str.lower(instance) self.namespace = str.upper(namespace) self.prompt = self.namespace + '>' # Create a new SSH client object client = paramiko.SSHClient() # Set SSH key parameters to auto accept unknown hosts client.load_system_host_keys() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # Connect to the host client.connect(hostname=remote_conn_details.remote_address, port=remote_conn_details.remote_port, username=remote_conn_details.username, password=remote_conn_details.password) # Create a client interaction class which will interact with the host from paramikoe import SSHClientInteraction interact = SSHClientInteraction(client, timeout=10, display=False) self.connection = interact self.connection.logfile_read = open(logfile, 'w') self.client = client # apparently there is a deconstructor which disconnects (probably sends a FYN packet) when client is gone
def __init__(self, config=None, config_json=None, config_fp=None, config_dir=None, areas=None, peril_areas=None, peril_areas_index=None, peril_areas_index_props=None, loc_to_global_areas_boundary_min_distance=0, vulnerabilities=None, loc_id_col='locnumber'): super(self.__class__, self).__init__( config=config, config_json=config_json, config_fp=config_fp, config_dir=config_dir, ) loc_config = self.config.get('exposure') or lookup.config.get( 'locations') self.loc_id_col = str.lower(str( loc_config.get('id_col') or loc_id_col)) self.peril_lookup = OasisPerilLookup( config=self.config, config_dir=self.config_dir, areas=areas, peril_areas=peril_areas, peril_areas_index=peril_areas_index, peril_areas_index_props=peril_areas_index_props, loc_to_global_areas_boundary_min_distance= loc_to_global_areas_boundary_min_distance, loc_id_col=self.loc_id_col) self.peril_area_id_key = str( str(self.config['peril'].get('peril_area_id_col') or '') or 'peril_area_id').lower() self.vulnerability_id_key = str( str(self.config['vulnerability'].get('vulnerability_id_col')) or 'vulnerability_id').lower() self.vulnerability_lookup = OasisVulnerabilityLookup( config=self.config, config_dir=self.config_dir, vulnerabilities=vulnerabilities, loc_id_col=self.loc_id_col)
def __init__( self, config=None, config_json=None, config_fp=None, config_dir=None, vulnerabilities=None, loc_id_col='locnumber' ): super(self.__class__, self).__init__(config=config, config_json=config_json, config_fp=config_fp, config_dir=config_dir) if vulnerabilities or self.config.get('vulnerability'): self.col_dtypes, self.key_cols, self.vuln_id_col, self.vulnerabilities = self.get_vulnerabilities(vulnerabilities=vulnerabilities) if self.config.get('exposure') or self.config.get('locations'): self.loc_id_col = str.lower(str(self.config['exposure'].get('id_col') or loc_id_col))
def guess_collections(PossibleCollections, PossibleGenera, Genus): """By comparing user-provided genus and iDigBio-scraped genera, program guesses correct collection""" BestGuess = None for i in range( len(PossibleGenera) ): #take a guess of correct collection based on which record matches user-provided genus if PossibleGenera[i] == str.lower(Genus[0]): BestGuess = i print() print('Best guess of correct collection: ' + PossibleCollections[i]) GoodGuess = eval(input("Is this the correct collection? [y/n]")) if GoodGuess == 'y': CollectionsChoice = PossibleCollections[BestGuess] if BestGuess == None: print("No match found. Can't guess. Please choose a collection.") CollectionsChoice = user_choose_collection(PossibleCollections) return CollectionsChoice
def generate_peril_areas_rtree_file_index( self, keys_data_fp, areas_rtree_index_fp, lookup_config_fp=None, lookup_config=None, ): # Convert paths to absolute keys_data_fp = as_path(keys_data_fp, 'Lookup Data directory', is_dir=True, preexists=True) areas_rtree_index_fp = as_path(areas_rtree_index_fp, 'Index output file path', preexists=False) lookup_config_fp = as_path(lookup_config_fp, 'Built-in lookup config file path', preexists=True) if not (lookup_config or lookup_config_fp): raise OasisException('Either a built-in lookup config. or config. file path is required') config = get_json(src_fp=lookup_config_fp) if lookup_config_fp else lookup_config config_dir = os.path.dirname(lookup_config_fp) if lookup_config_fp else keys_data_fp peril_config = config.get('peril') if not peril_config: raise OasisException( 'The lookup config must contain a peril-related subdictionary with a key named ' '`peril` defining area-peril-related model information' ) areas_fp = peril_config.get('file_path') if not areas_fp: raise OasisException( 'The lookup peril config must define the path of a peril areas ' '(or area peril) file with the key name `file_path`' ) if areas_fp.startswith('%%KEYS_DATA_PATH%%'): areas_fp = areas_fp.replace('%%KEYS_DATA_PATH%%', keys_data_fp) if not os.path.isabs(areas_fp): areas_fp = os.path.join(config_dir, areas_fp) areas_fp = as_path(areas_fp, 'areas_fp') src_type = str.lower(str(peril_config.get('file_type')) or '') or 'csv' peril_id_col = str.lower(str(peril_config.get('peril_id_col')) or '') or 'peril_id' coverage_config = config.get('coverage') if not coverage_config: raise OasisException( 'The lookup config must contain a coverage-related subdictionary with a key named ' '`coverage` defining coverage related model information' ) coverage_type_col = str.lower(str(coverage_config.get('coverage_type_col')) or '') or 'coverage_type' peril_area_id_col = str.lower(str(peril_config.get('peril_area_id_col')) or '') or 'area_peril_id' area_poly_coords_cols = peril_config.get('area_poly_coords_cols') if not area_poly_coords_cols: raise OasisException( 'The lookup peril config must define the column names of ' 'the coordinates used to define areas in the peril areas ' '(area peril) file using the key `area_poly_coords_cols`' ) non_na_cols = ( tuple(col.lower() for col in peril_config['non_na_cols']) if peril_config.get('non_na_cols') else tuple(col.lower() for col in [peril_area_id_col] + area_poly_coords_cols.values()) ) col_dtypes = peril_config.get('col_dtypes') or {peril_area_id_col: int} sort_cols = peril_config.get('sort_cols') or peril_area_id_col area_poly_coords_seq_start_idx = peril_config.get('area_poly_coords_seq_start_idx') or 1 area_reg_poly_radius = peril_config.get('area_reg_poly_radius') or 0.00166 index_props = peril_config.get('rtree_index') index_props.pop('filename') return PerilAreasIndex.create_from_peril_areas_file( src_fp=areas_fp, src_type=src_type, peril_id_col=peril_id_col, coverage_type_col=coverage_type_col, peril_area_id_col=peril_area_id_col, non_na_cols=non_na_cols, col_dtypes=col_dtypes, sort_cols=sort_cols, area_poly_coords_cols=area_poly_coords_cols, area_poly_coords_seq_start_idx=area_poly_coords_seq_start_idx, area_reg_poly_radius=area_reg_poly_radius, index_fp=areas_rtree_index_fp, index_props=index_props )
def __init__(self, areas=None, config=None, config_json=None, config_fp=None, config_dir=None, loc_to_global_areas_boundary_min_distance=0, peril_areas=None, peril_areas_index=None, peril_areas_index_fp=None, peril_areas_index_props=None, loc_id_col='locnumber'): super(self.__class__, self).__init__(config=config, config_json=config_json, config_fp=config_fp, config_dir=config_dir) peril_config = self.config.get('peril') or {} if areas or peril_areas or peril_config: if peril_areas_index: self.peril_areas_index = peril_areas_index self.peril_areas_index_props = self.peril_areas_index_props.properties.as_dict( ) elif (areas or peril_areas): self.index_props = (peril_areas_index_props or peril_config.get('rtree_index') or DEFAULT_RTREE_INDEX_PROPS) self.peril_areas_index = PerilAreasIndex( areas=areas, peril_areas=peril_areas, properties=self.index_props) else: areas_rtree_index_config = peril_config.get( 'rtree_index') or {} index_fp = peril_areas_index_fp or areas_rtree_index_config.get( 'filename') if not os.path.isabs(index_fp): index_fp = os.path.join(self.config_dir, index_fp) index_fp = as_path(index_fp, 'index_fp', preexists=False) if index_fp: idx_ext = areas_rtree_index_config.get( 'idx_extension') or 'idx' dat_ext = areas_rtree_index_config.get( 'dat_extension') or 'dat' if not (os.path.exists('{}.{}'.format(index_fp, idx_ext)) or os.path.exists('{}.{}'.format( index_fp, dat_ext))): raise OasisException( 'No Rtree file index {}.{{{}, {}}} found'.format( index_fp, idx_ext, dat_ext)) self.peril_areas_index = PerilAreasIndex(fp=index_fp) self.peril_areas_index_props = self.peril_areas_index.properties.as_dict( ) self.peril_areas_boundary = box(*self.peril_areas_index.bounds, ccw=False) _centroid = self.peril_areas_boundary.centroid self.peril_areas_centre = _centroid.x, _centroid.y self.loc_to_global_areas_boundary_min_distance = ( loc_to_global_areas_boundary_min_distance or self.config['peril'].get( 'loc_to_global_areas_boundary_min_distance') or 0) if self.config.get('exposure') or self.config.get('locations'): self.loc_id_col = str.lower( str(self.config['exposure'].get('id_col') or loc_id_col)) self.loc_coords_x_col = str.lower( str(self.config['exposure'].get('coords_x_col')) or 'lon') self.loc_coords_y_col = str.lower( str(self.config['exposure'].get('coords_y_col')) or 'lat') self.loc_coords_x_bounds = tuple( self.config['exposure'].get('coords_x_bounds') or ()) or (-180, 180) self.loc_coords_y_bounds = tuple( self.config['exposure'].get('coords_y_bounds') or ()) or (-90, 90)
def get_rs_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): '''Find the RS Components HTML page for a part number and return the URL and parse tree.''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: url = 'http://it.rs-online.com/web/c/?searchTerm=' + urlquote(pn + ' ' + extra_search_terms, safe='') elif url[0] == '/': url = 'http://it.rs-online.com' + url elif url.startswith('..'): url = 'http://it.rs-online.com/Search/' + url # Open the URL, read the HTML from it, and parse it into a tree structure. for _ in range(HTML_RESPONSE_RETRIES): try: req = FakeBrowser(url) response = urlopen(req) html = response.read() break except WEB_SCRAPE_EXCEPTIONS: logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) pass else: # Couldn't get a good read from the website. logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', class_='specTableContainer') is not None: return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('div', class_='srtnPageContainer') is not None: logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) if descend <= 0: logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) raise PartHtmlError else: # Look for the table of products. products = tree.find_all('tr', class_='resultRow') # Extract the product links for the part numbers from the table. product_links= [] for p in products: try: product_links.append(p.find('a',class_='primarySearchLink')['href']) # Up to now get the first url found in the list. i.e. do not choose the url based on the stock type (e.g. single unit, reel etc.) return get_rs_part_html_tree(dist, pn, extra_search_terms,url=product_links[0], descend=descend-1) except AttributeError: continue except TypeError: #~ print('****************dist:',dist,'pn:**************************',pn) continue #~ # If the tree is for a list of products, then examine the links to try to find the part number. #~ if tree.find('div', class_='srtnPageContainer') is not None: #~ if descend <= 0: #~ raise PartHtmlError #~ else: #~ # Look for the table of products. #~ products = tree.find('table', #~ class_='productLister', #~ id='sProdList').find_all('tr', #~ class_='altRow') #~ # Extract the product links for the part numbers from the table. #~ product_links = [] #~ for p in products: #~ try: #~ product_links.append( #~ p.find('td', #~ class_='mftrPart').find('p', #~ class_='wordBreak').a) #~ except AttributeError: #~ continue #~ # Extract all the part numbers from the text portion of the links. #~ part_numbers = [l.text for l in product_links] #~ # Look for the part number in the list that most closely matches the requested part number. #~ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] #~ # Now look for the link that goes with the closest matching part number. #~ for l in product_links: #~ if l.text == match: #~ # Get the tree for the linked-to page and return that. #~ return get_rs_part_html_tree(dist, pn, extra_search_terms, #~ url=l['href'], descend=descend-1) # I don't know what happened here, so give up. logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) raise PartHtmlError
def importieren(self, pfad = None, liste = None, ergaenzungsname = None, anzeigename_ergaenzen = False, nach_unten = False, force_gruppenname = None, force_scale = None, DBschema_erweitern = True): # Der Username der verwendet werden soll if len(auth_user_global) > 0: # Ist belegt auth_user = auth_user_global[0] else: auth_user = None self.iface.layerTreeView().setCurrentLayer(None) # None entspricht einem Null Pointer -> Auswahl wird entfernt -> nicht ausgewählt # Wird in der Regel verwendet wenn # Gemeindespezifische Daten geladen werden # zwecks Übersichtlichkeit self.anzeigename_aendern = anzeigename_ergaenzen self.gruppen_erg_name = ergaenzungsname # oberste Gruppe/Layer wird mit diesem Namen ergänzt! if pfad == "": return # Das Qgis Projektfile ist ein XML und wird # hier eingelesen try: #pfad = 'd:/delme.qgs' #xml = file(pfad).read() #QtWidgets.QMessageBox.about(None, "Fehler", str(locale.getpreferredencoding())) project_file = open(pfad,'r',-1,'UTF8') xml = project_file.read() d = QtXml.QDomDocument() d.setContent(xml) except IOError: QtWidgets.QMessageBox.about(None, "Fehler", "QGIS Projektdatei " + pfad + " nicht gefunden!") return # Die gewünschten Tagelemente aus dem XML herauslesen self.maps = d.elementsByTagName("maplayer") self.legends = d.elementsByTagName("legendlayer") self.gruppen = d.elementsByTagName("legendgroup") self.lyr = None self.joinlayerid = None #Zuerst den aktuellen Pfad auf dem #Qgis steht auslesen (kann z.B. ein lokaler Pfad sein #von dem ein Projekt geladen wird CurrentPath = QgsProject.instance().fileName() #Dann auf den jeweiligen Pfad setzen, von dem geladen wird. Sonst kann kein Projekt #mit absoluten Pfaden abgespeichert werden (für Layer die mit dem #VogisMenü geladen werden) QgsProject.instance().setFileName(pfad) #falls es länger dauert, ein kurzes Infofenster #für den Anwender progressi = QtWidgets.QProgressDialog('Lade Daten','Abbrechen',0,self.maps.length()) progressi.setFixedSize(350,90) btnCancel = QtWidgets.QPushButton() btnCancel.setText('Abbrechen') btnCancel.setFixedSize(70,30) progressi.setCancelButton(btnCancel) progressi.setWindowModality(1) #Schleife geht alle Layer die in der Legende aufscheinen durch. Hier #ist nämlich die reihenfolge festgelegt, wie sie in Qgis dargestellt werden #Diese Schleife brauch ich nur für die richtige Reihenfolge #der importierten Layer in Qgis zaehler = 0 # der Zähler für die Anzahl der geladenen Layer j = 0 #for j in range(self.legends.length(),-1,-1): for j in range(self.legends.length()): # Schleife geht alle Layer die in der maplayer tags aufscheinen durch # dort ist nämlich die wirkliche Information für die Darstellung im # Qgis. Also wird zuerst der Layer per ID in der Obigen # Schleife ausgewählt und dann in dieser Schleife im maplayertag # identifiziert # self.lyr=None for i in range(self.maps.length()): # prüfen ob der jeweilige layer nicht schon geladen ist. um das zu tun # müssen wir im vogis projektimport die identifikation über # die layerid tag machen. berücksichtigt werden muß auch # ob die layerid durch den ergaenzungsnamen erweitert wurde!! quelli = self.maps.item(i).namedItem("id").firstChild().toText().data() laden = True lyr_tmp = None for lyr_tmp in QgsProject.instance().mapLayers(): #alle bereits geladenen Layer durchgehen -> Dictionary #QtWidgets.QMessageBox.about(None, "Fehler", str(lyr_tmp)) if (ergaenzungsname == None) and (lyr_tmp == quelli): #Treffer: der Layer ist schon geladen laden = False if (ergaenzungsname != None) and (lyr_tmp == quelli + ergaenzungsname): #Treffer: der Layer ist schon geladen laden = False #Die Layerid ist in den legend tags und maplayer tags gleich #so kann ein layer genau identifiziert werden. ist laden zudem True #gehts also weiter if (self.maps.item(i).namedItem("id").firstChild().toText().data() == self.legends.item(j).namedItem("filegroup").namedItem("legendlayerfile").attributes().namedItem("layerid").nodeValue()) and laden: #ACHTUNG: Wieder aktivieren!!!!!!!!!! # wenn nur ein Teil der Layer eines Projekts geladen werden sollen. Die Liste enthält die # Namen dieser Layer if liste != None: brake_val = True for nd in range(len(liste)): if liste[nd] == self.legends.item(j).attributes().namedItem("name").nodeValue(): brake_val = False break if brake_val: continue # Nächster Layer, ist nicht auf der Liste # prüfen, ob der jeweilige Layer eine oder mehrere Jointabelle(n) verwendet self.joinlayerid = '' for sj in range(self.maps.item(i).namedItem("vectorjoins").childNodes().length()): # leider muss ich dann nochmals alles durchgehen.... for lj in range(self.maps.length()): if (self.maps.item(lj).namedItem("id").firstChild().toText().data() == self.maps.item(i).namedItem("vectorjoins").childNodes().item(sj).attributes().namedItem('joinLayerId').nodeValue()): self.joinlayerid = self.maps.item(i).namedItem("vectorjoins").childNodes().item(sj).attributes().namedItem('joinLayerId').nodeValue() #ACHTUNG: unbedingt den nodeValue der ID ändern wenn Gemeindeweise #geladen wird (DKM) Da in den Qgis Projekten der Gemeinden die jeweilig ID des Layers #der Einfachheit halber ident ist, würde so qgis den Layer nicht importieren!!! #So wie der Layername in der Darstellung geändert wird wird auch die ID des Nodes VOR #dem Laden geändert, damit Qgis das dann so übernimmt!! noddi = self.maps.item(i).namedItem("id") if ergaenzungsname != None: noddi.firstChild().setNodeValue(noddi.firstChild().nodeValue() + ergaenzungsname) #Abhängig von der vogisini wird das Encoding #aus der Projektdatei genommen oder CPG datei oder #wird auf System gesetzt #ist self.vogisEncoding == project dann werden die Einstellungen des Projekt verwendet base_name = os.path.dirname(pfad) + '/' + os.path.basename(self.maps.item(i).namedItem("datasource").firstChild().nodeValue()) # Achtung, zwischen absolutem und relativem Pfad unterscheiden if len(os.path.dirname(self.maps.item(i).namedItem("datasource").firstChild().nodeValue())) < 2: # relativer Pfad im QGIS Projekt! base_name = os.path.dirname(pfad) + '/' + os.path.basename(self.maps.item(i).namedItem("datasource").firstChild().nodeValue()) else: # absoluter Pfad im QGIS Projekt! base_name = self.maps.item(i).namedItem("datasource").firstChild().nodeValue() if vogisEncoding_global[0] == 'menue': # entweder CPG datei oder System setzen try: # gibts ein cpg datei datei = open(os.path.splitext(base_name)[0] + '.cpg','r') codierung_string = datei.read() datei.close() self.maps.item(i).namedItem("provider").attributes().namedItem('encoding').setNodeValue(codierung_string) except IOError: # Es wird der Wert System zugewiesen self.maps.item(i).namedItem("provider").attributes().namedItem('encoding').setNodeValue('System') # unbedingt ALLES DESELEKTIEREN, sonst Probleme mit der Reihenfolge self.iface.layerTreeView().setCurrentLayer(None) # None entspricht einem Null Pointer -> Auswahl wird entfernt -> nicht ausgewählt nv_ds = '' nv_provider = '' nv_encoding = '' ############################################################################# # Das Umschalten der Vektordaten auf die Geodatenbank - unter Bedingungen # es darf kein Layer aus einer Geodatenbank hier verwurschtelt werden ############################################################################# if self.maps.item(i).attributes().namedItem('type').nodeValue() == 'vector' and vogisDb_global[0] != 'filesystem geodaten' and self.maps.item(i).namedItem("datasource").firstChild().nodeValue().find('host') < 0: tablename = self.maps.item(i).namedItem("datasource").firstChild().nodeValue() sql = '' rc=[] db_ogr = '' # prüfen ob der layer eine shape datenquelle ist # und ob ein subset definiert ist if tablename.find('.shp') > 0 and (tablename.lower().find('subset') > 0 or tablename.lower().find('SUBSET') > 0 or tablename.lower().find('Subset') > 0): rc = textfilter_subset(self.maps.item(i).namedItem("datasource").firstChild().nodeValue()) tablename = rc[0] sql = rc[1] db_ogr = rc[0] else: tablename = os.path.basename(self.maps.item(i).namedItem("datasource").firstChild().nodeValue()).split('.shp')[0] db_ogr = tablename if ergaenzungsname != None and DBschema_erweitern: tablename = str.lower('\"' + ergaenzungsname + '\".\"' + tablename + '\"') else: tablename = str.lower('\"vorarlberg".\"' + tablename + '\"') # Sonderzeichen berücksichtigen! tablename = tablename.replace(('ä'),'ae') tablename = tablename.replace(('Ä'),'Ae') tablename = tablename.replace(('ö'),'oe') tablename = tablename.replace(('Ö'),'Oe') tablename = tablename.replace(('ü'),'ue') tablename = tablename.replace(('Ü'),'Ue') tablename = tablename.replace(('ß'),'ss') tablename = tablename.replace('. ','_') ################################################ # Geometriespalte bestimmen -- geht nur mit OGR param_list = str.split(vogisDb_global[0]) host = '' dbname='' port='' for param in param_list: if str.find(param,'dbname') >= 0: dbname = str.replace(param,'dbname=','') elif str.find(param,'host=') >= 0: host = str.replace(param,'host=','') elif str.find(param,'port=') >= 0: port = str.replace(param,'port=','') try: if auth_user == None: outputdb = ogr.Open('pg: host=' + host + ' dbname=' + dbname + ' schemas=vorarlberg' + ' port=' + port) else: outputdb = ogr.Open('pg: host=' + host + ' dbname=' + dbname + ' schemas=vorarlberg' + ' port=' + port + ' user='******'the_geom' ################################################## # Geometriespalte Ende if self.maps.item(i).namedItem("datasource").firstChild().nodeValue().find('ogc_fid') > 0: # Achtung, das Attribut user darf nicht zwingend immer nur klein sein -> Siehe Usermapping in der Doku if auth_user == None: dbpath = str.lower(vogisDb_global[0] + ' sslmode=disable table=' + tablename + ' (' + geom_column + ') sql') + sql else: dbpath = str.lower(vogisDb_global[0]) + ' user='******' sslmode=disable table=' + tablename + ' (' + geom_column + ') sql') + sql else: # Achtung, das Attribut user darf nicht zwingend immer nur klein sein -> Siehe Usermapping in der Doku if auth_user == None: dbpath = str.lower(vogisDb_global[0] + ' sslmode=disable key=ogc_fid table=' + tablename + ' (' + geom_column + ') sql') + sql else: dbpath = str.lower(vogisDb_global[0]) + ' user='******' sslmode=disable key=ogc_fid table=' + tablename + ' (' + geom_column + ') sql') + sql nv_ds = self.maps.item(i).namedItem("datasource").firstChild().nodeValue() nv_provider = self.maps.item(i).namedItem("provider").firstChild().nodeValue() nv_encoding = self.maps.item(i).namedItem("provider").attributes().namedItem('encoding').nodeValue() self.maps.item(i).namedItem("datasource").firstChild().setNodeValue(dbpath) self.maps.item(i).namedItem("provider").firstChild().setNodeValue('postgres') self.maps.item(i).namedItem("provider").attributes().namedItem('encoding').setNodeValue('UTF-8') if os.path.abspath(os.path.dirname(__file__)) != path_global[0]: return # Layer einlesen! proj_read = QgsProject.instance().readLayer(self.maps.item(i)) # Der Fortschrittsbalken progressi.setValue(j) progressi.forceShow() if progressi.wasCanceled(): break #QtGui.QMessageBox.about(None, "Achtung", str(proj_read)) if not proj_read and vogisDb_global[0] == 'filesystem geodaten': # hier wird der Layer geladen und gemäß den Eintragungen # der DomNode auch gerendert und dargestellt QtWidgets.QMessageBox.about(None, "Achtung", "Layer " + self.legends.item(j).attributes().namedItem("name").nodeValue() + " nicht gefunden!") continue elif not proj_read and vogisDb_global[0] != 'filesystem geodaten': # Probieren auf Filesystem umzuschalten QtWidgets.QMessageBox.about(None, "Achtung", "Layer - " + self.legends.item(j).attributes().namedItem("name").nodeValue() + " - in der Datenbank nicht gefunden - es wird aufs Filesystem umgeschaltet") self.maps.item(i).namedItem("datasource").firstChild().setNodeValue(nv_ds) self.maps.item(i).namedItem("provider").firstChild().setNodeValue(nv_provider) self.maps.item(i).namedItem("provider").attributes().namedItem(nv_encoding) if not QgsProject.instance().readLayer(self.maps.item(i)): #Trotzdem nicht gefunden, wir geben auf QtWidgets.QMessageBox.about(None, "Achtung", "Layer " + self.legends.item(j).attributes().namedItem("name").nodeValue() + " nicht gefunden!") continue # den Anzeigenamen im Qgis ebenfalls ändern # dazu zuerst den richtigen Layer anhand der Layerid auswählen # leginterface = self.iface.legendInterface() #for lyr_tmp in leginterface.layers(): for lyr_tmp in QgsProject.instance().mapLayers(): #alle bereits geladenen Layer durchgehen -> Dictionary if lyr_tmp == noddi.firstChild().nodeValue(): self.lyr = QgsProject.instance().mapLayers()[lyr_tmp] if force_scale != None: self.lyr.setMaximumScale(25000) self.lyr.setScaleBasedVisibility(True) #Abhängig von der vogisini wird das KBS #aus der Projektdatei genommen oder aus dem *.prj File if vogisKBS_global[0] == 'menue': #Koordinatenbezugssystem aus dem prj file holen, wenn vorhanden, #und von dort zuweisen (die Projekteinstellung überschreiben) try: datei = open(os.path.splitext(self.lyr.source())[0] + '.prj','r') bezugssystem_string = datei.read() #falls kein sauberer EPSG String, machen wir eine Zuweisung für unser 31254 if (re.search('MGI\D+Austria\D+GK\D+West',bezugssystem_string, re.I)) != None: #Arcgis macht keinen sauberen EPSG String bezugssystem_crs = QgsCoordinateReferenceSystem() bezugssystem_crs.createFromSrid(31254) else: bezugssystem_crs = QgsCoordinateReferenceSystem(bezugssystem_string) datei.close() self.lyr.setCrs(bezugssystem_crs) except IOError: pass #dann in der Applikation registrieren #QgsMapLayerRegistry.instance().addMapLayer(self.lyr) # gejointe Tabellen brauchen eine Spezialbehandlung: Joininfo wird # ausgelesen, dann der join gelöscht und erst wenn alles geladen wurde # wieder neu erstellt. Sonst kann es Probleme geben! unterstütz # werden beleibig viele layer mit beliebig vielen joins # es handelt sich um einen layer mir midestens einem eingetragenen join single_lyr_join = lyr_join() # eigenes struktur objekt instanzieren if not self.joinlayerid == '': # checken ob für den layer mindestens ein join eingetragen ist single_lyr_join.joinlayer = self.lyr single_lyr_join.joininfo = self.lyr.vectorJoins() self.joinliste.append(single_lyr_join) # eine liste mit joinlayern und deren joininfo führen for rem_join in self.lyr.vectorJoins(): # für den joinlayer die joins entfernen - es können merhere sein kasperle = rem_join.joinLayerId self.lyr.removeJoin(str(rem_join.joinLayerId)) #Und nun noch den Layernamen für die Darstellung #im Qgis ergänzen. Siehe oben, bei gemeindeweisem Laden if (ergaenzungsname != None) and (self.lyr != None) and self.anzeigename_aendern: # noch ein boolean wegen der wasserwirtschaft!! if not (self.lyr.name().find(ergaenzungsname) > -1): # ACHTUNG: Sonst wird bei wiederholtem klicken der Name nochmal rangehängt if self.lyr.name().find("(a)") > -1: aktname = str.strip((self.lyr.name().rstrip("(a)"))) + "-" + ergaenzungsname + " (a)" self.lyr.setName(aktname) else: aktname = str.strip(self.lyr.name())+ "-" + ergaenzungsname self.lyr.setName(aktname) # abschließend schauen ob der aktiviert ist if (self.legends.item(j).attributes().namedItem("checked").nodeValue() == "Qt::Unchecked") and not (self.lyr is None): #leginterface.setLayerVisible(self.lyr,False) lyr_tree = QgsProject.instance().layerTreeRoot().findLayer(self.lyr) lyr_tree.setItemVisibilityChecked(False) index = QgsProject.instance().layerTreeRoot() zwetsch =QgsProject.instance().layerTreeRoot().findLayer(self.lyr.id()) dummy = zwetsch.clone() # Die Layer die später geladen werden müssen # auch weiter unte in der Legende sein Reihenfolge) # das wird mit der Variable zaehler gesteuert # QGIS höher 2.6 index_ins = index_zuweisen(self.legends.item(j).attributes().namedItem("name").nodeValue(),self.legends.item(j).parentNode()) index.insertChildNode(-1,dummy) zaehler = zaehler + 1 zwetsch.parent().removeChildNode(zwetsch) # sonst gibts probleme in der Reihenfolge # wenn gruppen und layer im top level vermischt if not (self.legends.item(j).parentNode().nodeName() == "legendgroup") and (force_gruppenname is None): zwetsch =QgsProject.instance().layerTreeRoot().findLayer(self.lyr.id()) dummy = zwetsch.clone() index.insertChildNode(index_ins,dummy) zwetsch.parent().removeChildNode(zwetsch) #abschließend schauen ob der Layer aufgeklappt ist #und das flag setzen if (self.legends.item(j).attributes().namedItem("open").nodeValue() == "false") and not (self.lyr is None): dummy.setExpanded(False) elif (self.legends.item(j).attributes().namedItem("open").nodeValue() == "true") and not (self.lyr is None): dummy.setExpanded(True) # hier könnte abgebrochen werden, wenn die layer einfach # nur reingeladen werden OHNE in Gruppenlyer abgelegt zu werden # continue ####################################################### # hier beginnt der Programmteil der die Gruppenlayer # behandelt - entweder wenn im Projektfile definiert # oder einfach wenn es im Menü # erwünscht wird ####################################################### if (self.legends.item(j).parentNode().nodeName() == "legendgroup") or not (force_gruppenname is None): self.gruppe_vorhanden = False #ACHTUNG: Layername und direkt übergeordneter Gruppenname #müssen sich unterscheiden, sonst kommts zu einem Fehler. Sollts #dennoch mal vorkommen, wird es hier abgefangen if self.legends.item(j).parentNode().attributes().namedItem("name").nodeValue() == self.legends.item(j).attributes().namedItem("name").nodeValue(): aktname = self.lyr.name() self.lyr.setName(aktname+"_") #prüfen ob die Gruppe schon angelegt ist grp_name = self.legends.item(j).parentNode().attributes().namedItem("name").nodeValue() #Name der Gruppe aus dem QGS Projektfile grp_obj = QgsProject.instance().layerTreeRoot().findGroup(grp_name) if (isinstance(grp_obj,QgsLayerTreeGroup)) and (not (grp_obj is None)): self.gruppe_vorhanden = True grp_name = force_gruppenname #Name ist übergeben worden grp_obj = QgsProject.instance().layerTreeRoot().findGroup(grp_name) if (isinstance(grp_obj,QgsLayerTreeGroup)) and (not (grp_obj is None)): self.gruppe_vorhanden = True ######################################################### # Gruppenlayer aus Projektdatei ######################################################### if self.legends.item(j).parentNode().attributes().namedItem("name").nodeValue() != "" and self.legends.item(j).parentNode().nodeName() == "legendgroup": QgsLayerTreeRegistryBridge(QgsProject.instance().layerTreeRoot(),QgsProject.instance()) kind = self.legends.item(j).parentNode() gruppen_hierarchie = pos_gruppe() gruppen_liste = [] while (kind.nodeName() == "legendgroup"): gruppen_hierarchie.name = kind.attributes().namedItem("name").nodeValue() # der name der dem layer unmittelbar übergeordnete Gruppe: Ebene gruppen_hierarchie.index = index_zuweisen(kind.attributes().namedItem("name").nodeValue(),kind.parentNode()) # Index der Darstellungsreihenfolge der Gruppe in ihrer Hierarchie gruppen_hierarchie.ex = kind.attributes().namedItem("open").nodeValue() gruppen_hierarchie.ch = kind.attributes().namedItem("checked").nodeValue() gruppen_liste.append(copy.deepcopy(gruppen_hierarchie)) # ACHTUNG: Referenz!! kind = kind.parentNode() # grp enthält das qtreewidgetitem Objekt der Gruppe!, in die der geladene # Layer verschoben werden soll! grp = sublayer(QgsProject.instance().layerTreeRoot(),gruppen_liste, self.gruppen_erg_name, nach_unten, anzeigename_ergaenzen)[0] #sollten es mehrere sein, immer nur die erste nehmen - siehe Erklärung beim Sub selbst zwtsch = QgsProject.instance().layerTreeRoot().findLayer(self.lyr.id()) dummy = zwtsch.clone() if not (isinstance(grp,QgsLayerTreeGroup)) or grp is None: QtWidgets.QMessageBox.about(None, "ACHTUNG","Anlegen der Gruppe gescheitert") break index_layer = index_zuweisen(self.legends.item(j).attributes().namedItem("name").nodeValue(),self.legends.item(j).parentNode()) # QtGui.QMessageBox.about(None, "LayeriD", str(dummy.layerId())) grp.insertChildNode(index_layer,dummy) zwtsch.parent().removeChildNode(zwtsch) # zwilling entfernen! ########################################################## # hier Endet der Teil der Gruppenlayer aus Projektdatei!! ######################################################### letzterplatz = False #Flagvariable ermittelt ob die Gruppe ganz nach unten gehört #die gruppe in die der layer eingebettet ist kommt nicht aus #einem projekt, sondern wird erzwungen. hier gibts allerdings #nur eine ebene (was das ganze einfacher macht) if (not force_gruppenname is None): # gruppe anlegen gruppen_hierarchie = pos_gruppe() gruppen_hierarchie.name = force_gruppenname # grp = sublayer(QgsProject.instance().layerTreeRoot(),leginterface,[gruppen_hierarchie])[0] grp = sublayer(QgsProject.instance().layerTreeRoot(),[gruppen_hierarchie])[0] zwtsch = QgsProject.instance().layerTreeRoot().findLayer(self.lyr.id()) #der geladene layer dummy = zwtsch.clone() # wiviele layer sind in der gruppe bereits vorhanden? # baum = QgsLayerTreeModel(grp) # anzahl_top_level_eintraege = baum.rowCount() baum = grp.findLayers() anzahl_top_level_eintraege = len(baum) baum = None # Sonst Absturz bei grp.parent().removeChildNode(grp) da baum auf ein Nichts refenrenziert! # den neuen ganz hinten einsetzen grp.insertChildNode(anzahl_top_level_eintraege,dummy) zwtsch.parent().removeChildNode(zwtsch) grp.setExpanded(False) if nach_unten: # ganz nach unten mit der gefüllten Gruppe, wenn das Flag gesetzt ist if not self.gruppe_vorhanden: dummy = grp.clone() QgsProject.instance().layerTreeRoot().insertChildNode(-1,dummy) grp.parent().removeChildNode(grp) else: # die Layer werden NICHT in einen self.gruppenlayer geladen # sollen aber nach unten verschoben werden if nach_unten: # wiviele layer sind in der gruppe bereits vorhanden? baum = QgsLayerTreeModel(QgsProject.instance().layerTreeRoot()) anzahl_top_level_eintraege = baum.rowCount() baum = None # Sonst Absturz bei grp.parent().removeChildNode(grp) da baum auf ein Nichts refenrenziert! zwtsch = QgsProject.instance().layerTreeRoot().findLayer(self.lyr.id()) #der geladene layer dummy = zwtsch.clone() # den neuen ganz hinten einsetzen QgsProject.instance().layerTreeRoot().insertChildNode(anzahl_top_level_eintraege,dummy) zwtsch.parent().removeChildNode(zwtsch) # abschließend schauen ob der Layer aufgeklappt ist # und das flag setzen - beim Verschieben in die Gruppenlayer # verändert sich das nämlich manchmal... if (self.legends.item(j).attributes().namedItem("open").nodeValue() == "false") and not (self.lyr is None): dummy.setExpanded(False) elif (self.legends.item(j).attributes().namedItem("open").nodeValue() == "true") and not (self.lyr is None): dummy.setExpanded(True) # der nachfolgende Code erzwingt eine Aktualisierung # der Legende und des MapWindow # Ansonsten kanns im Mapwindow Darstellungsprobleme geben! Wieso?? if not self.lyr is None: anzeigename = self.lyr.name() self.lyr.setName(anzeigename+" ") self.lyr.setName(anzeigename) else: QtWidgets.QMessageBox.about(None, "Achtung", "Layer " + self.legends.item(j).attributes().namedItem("name").nodeValue() + " nicht gefunden!") # unbedingt ALLES DEselektieren, sonst Probleme mit Reihenfolge self.iface.layerTreeView().setCurrentLayer(None) # None entspricht einem Null Pointer -> Auswahl wird entfernt -> nicht ausgewählt #Unbedingt zurücksetzen sonst kanns beim wiederholten #laden des gleichen Projektfiles einen Fehler geben: #wenn nämlich die Schleife erneut beginnt, nicht lädt und self.lyr #beim vorherigen laden steht! self.lyr = None # und weiter in der Schleife! # UNBEDINGT am Schluss QGis wieder auf den usprünglichen # Pfad zurücksetzen QgsProject.instance().setFileName(CurrentPath) #ACHTUNG: Aus irgendeinem Grund gibts Probleme mit den Gruppenlayer: Wenn innerhalb der so angelegten Gruppen # ein Layer ausgewählt wird, gibts beim Laden danach einen Fehler. Es MUSS deshalb der oberste Eintrag # der Legende vor allem Laden als Aktueller Layer gesetzt werden!!! #Objekte besser löschen self.legends = None self.legendTree = None self.maps = None self.legends = None self.gruppen = None ###################################################################### # Abschlussprüfung: sind alle da #prüfen ob alle Layer der Liste geladen wurden #das ist notwendig, da ja beim Projektladen alles passen kann aber #ein Layer nicht vorhanden ist ###################################################################### fehler = 0 layerzaehler = 0 # Weg mit dem Fortschrittsbalken # self.info.close() if liste != None: #wenn nur ein Teil der Layer eines Projekts geladen wurde. Die Liste enthält die #Namen dieser Layer for nd in range(len(liste)): for lyr_tmp_id in QgsProject.instance().mapLayers(): #alle bereits geladenen Layer durchgehen -> Dictionary lyr_tmp = QgsProject.instance().mapLayer(lyr_tmp_id) # Unbedingt die optionale Änderung des # Anzeigenamens (z.B. DKM) mitberücksichtigen!) if (ergaenzungsname != None) and self.anzeigename_aendern: if liste[nd] + "-" + ergaenzungsname == lyr_tmp.name(): layerzaehler = layerzaehler +1 elif liste[nd].rstrip(" (a)") + "-" + ergaenzungsname + ' (a)' == lyr_tmp.name(): layerzaehler = layerzaehler +1 else: if liste[nd] == lyr_tmp.name(): layerzaehler = layerzaehler +1 # ACHTUNG: Wurden nicht alle in der Liste (fürs importieren übergebne Layerliste mit Layernamen) angeführten Layer # anhand des Layernamensim Projekt gefunden gibts # hier noch eine Fehlermeldung if not liste is None: if len(liste) > layerzaehler: #Ints! Dann wurde was nicht geladen QtWidgets.QMessageBox.about(None, "Achtung", "Nicht alle Layer aus " + pfad + " konnte(n) geladen werden!!") # gejointe Relationen wiederherstellen # aber erst ganz am Schluss!! for singlejoin in self.joinliste: for singlejoininfo in singlejoin.joininfo: singlejoin.joinlayer.addJoin(singlejoininfo)
def get_digikey_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): '''Find the Digikey HTML page for a part number and return the URL and parse tree.''' def merge_price_tiers(main_tree, alt_tree): '''Merge the price tiers from the alternate-packaging tree into the main tree.''' try: insertion_point = main_tree.find('table', id='product-dollars').find('tr') for tr in alt_tree.find('table', id='product-dollars').find_all('tr'): insertion_point.insert_after(tr) except AttributeError: logger.log(DEBUG_OBSESSIVE, 'Problem merging price tiers for Digikey part {} with alternate packaging!'.format(pn)) def merge_qty_avail(main_tree, alt_tree): '''Merge the quantities from the alternate-packaging tree into the main tree.''' try: main_qty = get_digikey_qty_avail(main_tree) alt_qty = get_digikey_qty_avail(alt_tree) if main_qty is None: merged_qty = alt_qty elif alt_qty is None: merged_qty = main_qty else: merged_qty = max(main_qty, alt_qty) if merged_qty is not None: insertion_point = main_tree.find('td', id='quantityAvailable').find('span', id='dkQty') insertion_point.string = '{}'.format(merged_qty) except AttributeError: logger.log(DEBUG_OBSESSIVE, 'Problem merging available quantities for Digikey part {} with alternate packaging!'.format(pn)) # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: url = 'http://www.digikey.com/scripts/DkSearch/dksus.dll?WT.z_header=search_go&lang=en&keywords=' + urlquote( pn + ' ' + extra_search_terms, safe='') #url = 'http://www.digikey.com/product-search/en?KeyWords=' + urlquote(pn,safe='') + '&WT.z_header=search_go' elif url[0] == '/': url = 'http://www.digikey.com' + url # Open the URL, read the HTML from it, and parse it into a tree structure. req = FakeBrowser(url) for _ in range(HTML_RESPONSE_RETRIES): try: response = urlopen(req) html = response.read() break except WEB_SCRAPE_EXCEPTIONS: logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) else: # Couldn't get a good read from the website. logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist)) raise PartHtmlError # Use the following code if Javascript challenge pages are used to block scrapers. # try: # ghst = Ghost() # sess = ghst.start(plugins_enabled=False, download_images=False, show_scrollbars=False, javascript_enabled=False) # html, resources = sess.open(url) # print('type of HTML is {}'.format(type(html.content))) # html = html.content # except Exception as e: # print('Exception reading with Ghost: {}'.format(e)) try: tree = BeautifulSoup(html, 'lxml') except Exception: logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) raise PartHtmlError # If the tree contains the tag for a product page, then return it. if tree.find('div', class_='product-top-section') is not None: # Digikey separates cut-tape and reel packaging, so we need to examine more pages # to get all the pricing info. But don't descend any further if limit has been reached. if descend > 0: try: # Find all the URLs to alternate-packaging pages for this part. ap_urls = [ ap.find('li', class_='lnkAltPack').find_all('a')[-1].get('href') for ap in tree.find( 'div', class_='bota', id='additionalPackaging').find_all( 'ul', class_='more-expander-item') ] logger.log(DEBUG_OBSESSIVE,'Found {} alternate packagings for {} from {}'.format(len(ap_urls), pn, dist)) ap_trees_and_urls = [] # Initialize as empty in case no alternate packagings are found. try: ap_trees_and_urls = [get_digikey_part_html_tree(dist, pn, extra_search_terms, ap_url, descend=0) for ap_url in ap_urls] except Exception: logger.log(DEBUG_OBSESSIVE,'Failed to find alternate packagings for {} from {}'.format(pn, dist)) # Put the main tree on the list as well and then look through # the entire list for one that's non-reeled. Use this as the # main page for the part. ap_trees_and_urls.append((tree, url)) if digikey_part_is_reeled(tree): for ap_tree, ap_url in ap_trees_and_urls: if not digikey_part_is_reeled(ap_tree): # Found a non-reeled part, so use it as the main page. tree = ap_tree url = ap_url break # Done looking. # Now go through the other pages, merging their pricing and quantity # info into the main page. for ap_tree, ap_url in ap_trees_and_urls: if ap_tree is tree: continue # Skip examining the main tree. It already contains its info. try: # Merge the pricing info from that into the main parse tree to make # a single, unified set of price tiers... merge_price_tiers(tree, ap_tree) # and merge available quantity, using the maximum found. merge_qty_avail(tree, ap_tree) except AttributeError: logger.log(DEBUG_OBSESSIVE,'Problem merging price/qty for {} from {}'.format(pn, dist)) continue except AttributeError as e: logger.log(DEBUG_OBSESSIVE,'Problem parsing URLs from product page for {} from {}'.format(pn, dist)) return tree, url # Return the parse tree and the URL where it came from. # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('table', id='productTable') is not None: logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) if descend <= 0: logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) raise PartHtmlError else: # Look for the table of products. products = tree.find( 'table', id='productTable').find('tbody').find_all('tr') # Extract the product links for the part numbers from the table. # Extract links for both manufacturer and catalog numbers. product_links = [p.find('td', class_='tr-mfgPartNumber').a for p in products] product_links.extend([p.find('td', class_='tr-dkPartNumber').a for p in products]) # Extract all the part numbers from the text portion of the links. part_numbers = [l.text for l in product_links] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for l in product_links: if l.text == match: # Get the tree for the linked-to page and return that. logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist)) return get_digikey_part_html_tree(dist, pn, extra_search_terms, url=l['href'], descend=descend - 1) # If the HTML contains a list of part categories, then give up. if tree.find('form', id='keywordSearchForm') is not None: logger.log(DEBUG_OBSESSIVE,'Found high-level part categories for {} from {}'.format(pn, dist)) raise PartHtmlError # I don't know what happened here, so give up. logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) raise PartHtmlError
def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None, scrape_retries=2): '''Find the RS Components HTML page for a part number and return the URL and parse tree.''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: url = 'http://it.rs-online.com/web/c/?searchTerm=' + urlquote( pn + ' ' + extra_search_terms, safe='') elif url[0] == '/': url = 'http://it.rs-online.com' + url elif url.startswith('..'): url = 'http://it.rs-online.com/Search/' + url # Open the URL, read the HTML from it, and parse it into a tree structure. for _ in range(scrape_retries): try: req = FakeBrowser(url) response = urlopen(req) html = response.read() break except WEB_SCRAPE_EXCEPTIONS: logger.log( DEBUG_DETAILED, 'Exception while web-scraping {} from {}'.format(pn, dist)) pass else: # Couldn't get a good read from the website. logger.log(DEBUG_OBSESSIVE, 'No HTML page for {} from {}'.format(pn, dist)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: logger.log(DEBUG_OBSESSIVE, 'No HTML tree for {} from {}'.format(pn, dist)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]', '', str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))): logger.log(DEBUG_OBSESSIVE, 'No part number {} in HTML page from {}'.format(pn, dist)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', class_='specTableContainer') is not None: return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('div', class_='srtnPageContainer') is not None: logger.log(DEBUG_OBSESSIVE, 'Found product table for {} from {}'.format(pn, dist)) if descend <= 0: logger.log(DEBUG_OBSESSIVE, 'Passed descent limit for {} from {}'.format(pn, dist)) raise PartHtmlError else: # Look for the table of products. products = tree.find_all('tr', class_='resultRow') # Extract the product links for the part numbers from the table. product_links = [] for p in products: try: link = p.find('a', class_='primarySearchLink').get('href') if link is not None: product_links.append(link) # Up to now get the first url found in the list. i.e. do not choose the url based on the stock type (e.g. single unit, reel etc.) return get_part_html_tree( dist, pn, extra_search_terms, url=product_links[0], descend=descend - 1, scrape_retries=scrape_retries) except AttributeError: continue except TypeError: #~ print('****************dist:',dist,'pn:**************************',pn) continue #~ # If the tree is for a list of products, then examine the links to try to find the part number. #~ if tree.find('div', class_='srtnPageContainer') is not None: #~ if descend <= 0: #~ raise PartHtmlError #~ else: #~ # Look for the table of products. #~ products = tree.find('table', #~ class_='productLister', #~ id='sProdList').find_all('tr', #~ class_='altRow') #~ # Extract the product links for the part numbers from the table. #~ product_links = [] #~ for p in products: #~ try: #~ product_links.append( #~ p.find('td', #~ class_='mftrPart').find('p', #~ class_='wordBreak').a) #~ except AttributeError: #~ continue #~ # Extract all the part numbers from the text portion of the links. #~ part_numbers = [l.text for l in product_links] #~ # Look for the part number in the list that most closely matches the requested part number. #~ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] #~ # Now look for the link that goes with the closest matching part number. #~ for l in product_links: #~ if l.text == match: #~ # Get the tree for the linked-to page and return that. #~ return get_part_html_tree(dist, pn, extra_search_terms, #~ url=l['href'], descend=descend-1, scrape_retries=scrape_retries) # I don't know what happened here, so give up. logger.log(DEBUG_OBSESSIVE, 'Unknown error for {} from {}'.format(pn, dist)) raise PartHtmlError
def dist_get_part_html_tree(self, pn, extra_search_terms='', url=None, descend=2): '''@brief Find the farnell HTML page for a part number and return the URL and parse tree. @param pn Part number `str()`. @param extra_search_terms @param url @param descend @return (html `str()` of the page, url) ''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: url = 'http://it.farnell.com/Search?storeId=10165&catalogId=15001&categoryName=&selectedCategoryId=&langId=-4&categoryIdBox=&st=' \ + urlquote(pn, safe='') if extra_search_terms: url = url + urlquote(' ' + extra_search_terms, safe='') elif url[0] == '/': url = 'http://www.farnell.com' + url elif url.startswith('..'): url = 'http://www.farnell.com/Search/' + url # Open the URL, read the HTML from it, and parse it into a tree structure. try: html = self.browser.scrape_URL(url) except: self.logger.log( DEBUG_OBSESSIVE, 'No HTML page for {} from {}'.format(pn, self.name)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]', '', str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))): self.logger.log( DEBUG_OBSESSIVE, 'No part number {} in HTML page from {}'.format(pn, self.name)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: self.logger.log( DEBUG_OBSESSIVE, 'No HTML tree for {} from {}'.format(pn, self.name)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', class_='productDisplay', id='page') is not None: return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('table', class_='productLister', id='sProdList') is not None: self.logger.log( DEBUG_OBSESSIVE, 'Found product table for {} from {}'.format(pn, self.name)) if descend <= 0: self.logger.log( DEBUG_OBSESSIVE, 'Passed descent limit for {} from {}'.format( pn, self.name)) raise PartHtmlError else: # Look for the table of products. products = tree.find('table', class_='productLister').find_all( 'tr', class_='altRow') # Extract the product links for the part numbers from the table. product_links = [] for p in products: try: product_links.append( p.find('td', class_='mftrPart').find('a')) except AttributeError: continue #print('>>> ',pn,products,product_links)#TODO # Extract all the part numbers from the text portion of the links. part_numbers = [l.text for l in product_links] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for l in product_links: if l.text == match: # Get the tree for the linked-to page and return that. self.logger.log( DEBUG_OBSESSIVE, 'Selecting {} from product table for {} from {}'. format(l.text.strip(), pn, self.name)) return self.dist_get_part_html_tree( pn, extra_search_terms, url=l.get('href', ''), descend=descend - 1) # I don't know what happened here, so give up. self.logger.log(DEBUG_OBSESSIVE, 'Unknown error for {} from {}'.format(pn, self.name)) self.logger.log(DEBUG_HTTP_RESPONSES, 'Response was %s' % html) raise PartHtmlError
def getXYT(xyt_filename, match_only=False): # Read in a .fits or .npz file containing the output of the RHT. # If match_only is given, and a dictionary of Keys: # This will return whether ALL keys are found in the data of the given file # Else: # This will return the image coordinates of significant linearity, and the theta power spectrum at those coords. # This will return as two integer arrays of some_length, and an ntheta*some_length array of theta power if not os.path.isfile(xyt_filename): # Fast Failure Case - This file does not exist. if match_only: return False else: raise ValueError( 'Input xyt_filename in getXYT matches no existing file') else: # Attempts to extract header information for Matching, or else the data itself if xyt_filename.endswith('.npz'): # Allows very large files to be read in. data = np.load(xyt_filename, mmap_mode='r') if match_only: try: return all([ match_only[x] == data[str.lower(x)] for x in list(match_only.keys()) ]) except KeyError: return False Hi = data['hi'] Hj = data['hj'] Hthets = data['hthets'] elif xyt_filename.endswith('.fits'): hdu_list = fits.open( xyt_filename, mode='readonly', memmap=True, save_backup=False, checksum=True) #Allows for reading in very large files! header = hdu_list[0].header if match_only: try: return all([ match_only[x] == header[str.upper(x)] for x in list(match_only.keys()) ]) except KeyError: return False data = hdu_list[1].data Hi = data['hi'] Hj = data['hj'] Hthets = data['hthets'] else: raise ValueError( 'Supported input types in getXYT include .npz and .fits only') rebuild = None # Formats output properly if rebuild and filepath is not None: # Can recreate an entire 3D array of mostly 0s. data = getData(filepath) datay, datax = data.shape ntheta = Hthets[0].shape if BUFFER: xyt = np.memmap(tempfile.TemporaryFile(), dtype=DTYPE, mode='w+', shape=(datay, datax, ntheta)) xyt.fill(0.0) else: print( 'Warning: Reconstructing very large array in memory! Set BUFFER to True!' ) xyt = np.zeros((datay, datax, ntheta)) coords = list(zip(Hj, Hi)) for c in range(len(coords)): j, i = coords[c] xyt[j, i, :] = Hthets[c] return xyt else: # Returns the sparse, memory mapped form only. return Hi, Hj, Hthets
def get_digikey_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): '''Find the Digikey HTML page for a part number and return the URL and parse tree.''' def merge_price_tiers(main_tree, alt_tree): '''Merge the price tiers from the alternate-packaging tree into the main tree.''' try: insertion_point = main_tree.find('table', id='product-dollars').find('tr') for tr in alt_tree.find('table', id='product-dollars').find_all('tr'): insertion_point.insert_after(tr) except AttributeError: logger.log( DEBUG_OBSESSIVE, 'Problem merging price tiers for Digikey part {} with alternate packaging!' .format(pn)) pass def merge_qty_avail(main_tree, alt_tree): '''Merge the quantities from the alternate-packaging tree into the main tree.''' try: main_qty = get_digikey_qty_avail(main_tree) alt_qty = get_digikey_qty_avail(alt_tree) if main_qty is None: merged_qty = alt_qty elif alt_qty is None: merged_qty = main_qty else: merged_qty = max(main_qty, alt_qty) if merged_qty is not None: insertion_point = main_tree.find( 'td', id='quantityAvailable').find('span', id='dkQty') insertion_point.string = '{}'.format(merged_qty) except AttributeError: logger.log( DEBUG_OBSESSIVE, 'Problem merging available quantities for Digikey part {} with alternate packaging!' .format(pn)) pass # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: url = 'http://www.digikey.com/scripts/DkSearch/dksus.dll?WT.z_header=search_go&lang=en&keywords=' + urlquote( pn + ' ' + extra_search_terms, safe='') #url = 'http://www.digikey.com/product-search/en?KeyWords=' + urlquote(pn,safe='') + '&WT.z_header=search_go' elif url[0] == '/': url = 'http://www.digikey.com' + url # Open the URL, read the HTML from it, and parse it into a tree structure. req = FakeBrowser(url) for _ in range(HTML_RESPONSE_RETRIES): try: response = urlopen(req) html = response.read() break except WEB_SCRAPE_EXCEPTIONS: logger.log( DEBUG_DETAILED, 'Exception while web-scraping {} from {}'.format(pn, dist)) pass else: # Couldn't get a good read from the website. logger.log(DEBUG_OBSESSIVE, 'No HTML page for {} from {}'.format(pn, dist)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]', '', str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))): logger.log(DEBUG_OBSESSIVE, 'No part number {} in HTML page from {}'.format(pn, dist)) raise PartHtmlError # Use the following code if Javascript challenge pages are used to block scrapers. # try: # ghst = Ghost() # sess = ghst.start(plugins_enabled=False, download_images=False, show_scrollbars=False, javascript_enabled=False) # html, resources = sess.open(url) # print('type of HTML is {}'.format(type(html.content))) # html = html.content # except Exception as e: # print('Exception reading with Ghost: {}'.format(e)) try: tree = BeautifulSoup(html, 'lxml') except Exception: logger.log(DEBUG_OBSESSIVE, 'No HTML tree for {} from {}'.format(pn, dist)) raise PartHtmlError # If the tree contains the tag for a product page, then return it. if tree.find('div', class_='product-top-section') is not None: # Digikey separates cut-tape and reel packaging, so we need to examine more pages # to get all the pricing info. But don't descend any further if limit has been reached. if descend > 0: try: # Find all the URLs to alternate-packaging pages for this part. ap_urls = [ ap.find('li', class_='lnkAltPack').find_all('a')[-1].get('href') for ap in tree.find( 'div', class_='bota', id='additionalPackaging'). find_all('ul', class_='more-expander-item') ] logger.log( DEBUG_OBSESSIVE, 'Found {} alternate packagings for {} from {}'.format( len(ap_urls), pn, dist)) try: ap_trees_and_urls = [ get_digikey_part_html_tree(dist, pn, extra_search_terms, ap_url, descend=0) for ap_url in ap_urls ] except Exception: logger.log( DEBUG_OBSESSIVE, 'Failed to find alternate packagings for {} from {}'. format(pn, dist)) # Put the main tree on the list as well and then look through # the entire list for one that's non-reeled. Use this as the # main page for the part. ap_trees_and_urls.append((tree, url)) if digikey_part_is_reeled(tree): for ap_tree, ap_url in ap_trees_and_urls: if not digikey_part_is_reeled(ap_tree): # Found a non-reeled part, so use it as the main page. tree = ap_tree url = ap_url break # Done looking. # Now go through the other pages, merging their pricing and quantity # info into the main page. for ap_tree, ap_url in ap_trees_and_urls: if ap_tree is tree: continue # Skip examining the main tree. It already contains its info. try: # Merge the pricing info from that into the main parse tree to make # a single, unified set of price tiers... merge_price_tiers(tree, ap_tree) # and merge available quantity, using the maximum found. merge_qty_avail(tree, ap_tree) except AttributeError: logger.log( DEBUG_OBSESSIVE, 'Problem merging price/qty for {} from {}'.format( pn, dist)) continue except AttributeError: logger.log( DEBUG_OBSESSIVE, 'Problem parsing URLs from product page for {} from {}'. format(pn, dist)) pass return tree, url # Return the parse tree and the URL where it came from. # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('table', id='productTable') is not None: logger.log(DEBUG_OBSESSIVE, 'Found product table for {} from {}'.format(pn, dist)) if descend <= 0: logger.log(DEBUG_OBSESSIVE, 'Passed descent limit for {} from {}'.format(pn, dist)) raise PartHtmlError else: # Look for the table of products. products = tree.find( 'table', id='productTable').find('tbody').find_all('tr') # Extract the product links for the part numbers from the table. # Extract links for both manufacturer and catalog numbers. product_links = [ p.find('td', class_='tr-mfgPartNumber').a for p in products ] product_links.extend( [p.find('td', class_='tr-dkPartNumber').a for p in products]) # Extract all the part numbers from the text portion of the links. part_numbers = [l.text for l in product_links] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for l in product_links: if l.text == match: # Get the tree for the linked-to page and return that. logger.log( DEBUG_OBSESSIVE, 'Selecting {} from product table for {} from {}'. format(l.text, pn, dist)) return get_digikey_part_html_tree(dist, pn, extra_search_terms, url=l['href'], descend=descend - 1) # If the HTML contains a list of part categories, then give up. if tree.find('form', id='keywordSearchForm') is not None: logger.log( DEBUG_OBSESSIVE, 'Found high-level part categories for {} from {}'.format(pn, dist)) raise PartHtmlError # I don't know what happened here, so give up. logger.log(DEBUG_OBSESSIVE, 'Unknown error for {} from {}'.format(pn, dist)) raise PartHtmlError
def get_farnell_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): '''Find the farnell HTML page for a part number and return the URL and parse tree.''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: # url = 'http://www.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15003&langId=-1&storeId=10194&gs=true&st=' + urlquote( # pn + ' ' + extra_search_terms, # safe='') url = 'http://it.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15001&langId=-4&storeId=10165&gs=true&st=' + urlquote( pn + ' ' + extra_search_terms, safe='') elif url[0] == '/': url = 'http://www.farnell.com' + url elif url.startswith('..'): url = 'http://www.farnell.com/Search/' + url # Open the URL, read the HTML from it, and parse it into a tree structure. for _ in range(HTML_RESPONSE_RETRIES): try: req = FakeBrowser(url) response = urlopen(req) html = response.read() break except WEB_SCRAPE_EXCEPTIONS: logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) pass else: # Couldn't get a good read from the website. logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', class_='productDisplay', id='page') is not None: return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('table', class_='productLister', id='sProdList') is not None: logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) if descend <= 0: logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) raise PartHtmlError else: # Look for the table of products. products = tree.find('table', class_='productLister', id='sProdList').find_all('tr', class_='altRow') # Extract the product links for the part numbers from the table. product_links = [] for p in products: try: product_links.append( p.find('td', class_='mftrPart').find('p', class_='wordBreak').a) except AttributeError: continue # Extract all the part numbers from the text portion of the links. part_numbers = [l.text for l in product_links] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for l in product_links: if l.text == match: # Get the tree for the linked-to page and return that. logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist)) return get_farnell_part_html_tree(dist, pn, extra_search_terms, url=l['href'], descend=descend-1) # I don't know what happened here, so give up. logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) raise PartHtmlError
def direk_laden(PGdb, lyr_name, shapename, pfad, iface, subset = None): # Der Username der verwendet werden soll if len(auth_user_global) > 0: # Ist belegt auth_user = auth_user_global[0] else: auth_user = None iface.layerTreeView().setCurrentLayer(None) # Damit von ganz aussen in der LEgende angefangen wird! try: db = PGdb shapename_ohne_suffix = shapename.replace('.shp','') shapename_ohne_suffix = str(str.strip(str.lower(shapename_ohne_suffix))) if db != None: try: # Geodatenbank ################################################ # Geometriespalte bestimmen -- geht nur mit OGR try: if auth_user == None: outputdb = ogr.Open('pg: host =' + db.hostName() + ' dbname =' + db.databaseName() + ' schemas=' + schema + ' port=' + str(db.port())) else: outputdb = ogr.Open('pg: host =' + db.hostName() + ' dbname =' + db.databaseName() + ' schemas=' + schema + ' port=' + str(db.port()) + ' user='******'the_geom' ################################################ #das Laden der Daten uri = QgsDataSourceUri() uri.setConnection(db.hostName(),str(db.port()),db.databaseName(),'','') if not auth_user == None: uri.setUsername(auth_user) uri.setDataSource('vorarlberg', shapename_ohne_suffix, geom_column) erg_lyr = QgsVectorLayer(uri.uri(), lyr_name,"postgres") # prüfen ob erfolgreich geladen if not erg_lyr.isValid(): # nicht erfolgreich QtWidgets.QMessageBox.about(None, "Fehler", "Layer " + shapename_ohne_suffix + " in der Datenbank nicht gefunden - es wird aufs Filesystem umgeschaltet") erg_lyr = QgsVectorLayer(pfad + '/' + shapename, lyr_name,"ogr") except Exception: # noch schlechter QtWidgets.QMessageBox.about(None, "Fehler", "Layer " + shapename_ohne_suffix + " in der Datenbank nicht gefunden - es wird aufs Filesystem umgeschaltet") erg_lyr = QgsVectorLayer(pfad + '/' + shapename, lyr_name,"ogr") elif db == None: erg_lyr = QgsVectorLayer(pfad + '/' + shapename, lyr_name,"ogr") # Hier die attributive Auswahl if subset != None: erg_lyr.setSubsetString(subset) # prüfen ob was sinnvolles geladen werden konnte if erg_lyr.isValid(): return erg_lyr else: QtWidgets.QMessageBox.about(None, "Fehler", "Layer " + shapename + " konnte nicht geladen werden") return None except Exception as b: return None
def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): '''Find the TME HTML page for a part number and return the URL and parse tree.''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: url = 'https://www.tme.eu/en/katalog/?search=' + urlquote( pn + ' ' + extra_search_terms, safe='') elif url[0] == '/': url = 'https://www.tme.eu' + url # Open the URL, read the HTML from it, and parse it into a tree structure. req = FakeBrowser(url) for _ in range(HTML_RESPONSE_RETRIES): try: response = urlopen(req) html = response.read() break except WEB_SCRAPE_EXCEPTIONS: logger.log( DEBUG_DETAILED, 'Exception while web-scraping {} from {}'.format(pn, dist)) pass else: # Couldn't get a good read from the website. logger.log(DEBUG_OBSESSIVE, 'No HTML page for {} from {}'.format(pn, dist)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]', '', str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))): logger.log( DEBUG_OBSESSIVE, 'No part number {} in HTML page from {} ({})'.format( pn, dist, url)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: logger.log(DEBUG_OBSESSIVE, 'No HTML tree for {} from {}'.format(pn, dist)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', id='ph') is not None: return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('table', id="products") is not None: logger.log(DEBUG_OBSESSIVE, 'Found product table for {} from {}'.format(pn, dist)) if descend <= 0: logger.log(DEBUG_OBSESSIVE, 'Passed descent limit for {} from {}'.format(pn, dist)) raise PartHtmlError else: # Look for the table of products. products = tree.find('table', id="products").find_all( 'tr', class_=('product-row')) # Extract the product links for the part numbers from the table. product_links = [] for p in products: for a in p.find('td', class_='product').find_all('a'): product_links.append(a) # Extract all the part numbers from the text portion of the links. part_numbers = [l.text for l in product_links] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for l in product_links: if (not l['href'].startswith('./katalog')) and l.text == match: # Get the tree for the linked-to page and return that. logger.log( DEBUG_OBSESSIVE, 'Selecting {} from product table for {} from {}'. format(l.text, pn, dist)) # TODO: The current implementation does up to four HTTP # requests per part (search, part details page for TME P/N, # XHR for pricing information, and XHR for stock # availability). This is mainly for the compatibility with # other distributor implementations (html_tree gets passed # to all functions). # A modified implementation (which would pass JSON data # obtained by the XHR instead of the HTML DOM tree) might be # able to do the same with just two requests (search for TME # P/N, XHR for pricing and stock availability). return get_part_html_tree(dist, pn, extra_search_terms, url=l['href'], descend=descend - 1) # I don't know what happened here, so give up. logger.log(DEBUG_OBSESSIVE, 'Unknown error for {} from {}'.format(pn, dist)) raise PartHtmlError
def get_farnell_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): '''Find the farnell HTML page for a part number and return the URL and parse tree.''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: # url = 'http://www.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15003&langId=-1&storeId=10194&gs=true&st=' + urlquote( # pn + ' ' + extra_search_terms, # safe='') url = 'http://it.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15001&langId=-4&storeId=10165&gs=true&st=' + urlquote( pn + ' ' + extra_search_terms, safe='') elif url[0] == '/': url = 'http://www.farnell.com' + url elif url.startswith('..'): url = 'http://www.farnell.com/Search/' + url # Open the URL, read the HTML from it, and parse it into a tree structure. for _ in range(HTML_RESPONSE_RETRIES): try: req = FakeBrowser(url) response = urlopen(req) html = response.read() break except WEB_SCRAPE_EXCEPTIONS: logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) pass else: # Couldn't get a good read from the website. logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', class_='productDisplay', id='page') is not None: return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('table', class_='productLister', id='sProdList') is not None: logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) if descend <= 0: logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) raise PartHtmlError else: # Look for the table of products. products = tree.find('table', class_='productLister', id='sProdList').find_all('tr', class_='altRow') # Extract the product links for the part numbers from the table. product_links = [] for p in products: try: product_links.append(p.find('td', class_='mftrPart').find('a')) except AttributeError: continue # Extract all the part numbers from the text portion of the links. part_numbers = [l.text for l in product_links] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for l in product_links: if l.text == match: # Get the tree for the linked-to page and return that. logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist)) return get_farnell_part_html_tree(dist, pn, extra_search_terms, url=l['href'], descend=descend-1) # I don't know what happened here, so give up. logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) raise PartHtmlError
def dist_get_part_html_tree(self, pn, extra_search_terms='', url=None, descend=2): '''@brief Find the RS Components HTML page for a part number and return the URL and parse tree. @param pn Part number `str()`. @param extra_search_terms @param url @param descend @return (html `str()` of the page, url) ''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: url = 'http://it.rs-online.com/web/c/?searchTerm=' + urlquote( pn, safe='') if extra_search_terms: url = url + urlquote(' ' + extra_search_terms, safe='') elif url[0] == '/': url = 'http://it.rs-online.com' + url elif url.startswith('..'): url = 'http://it.rs-online.com/Search/' + url # Open the URL, read the HTML from it, and parse it into a tree structure. try: html = self.browser.scrape_URL(url) except: self.logger.log( DEBUG_OBSESSIVE, 'No HTML page for {} from {}'.format(pn, self.name)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: self.logger.log( DEBUG_OBSESSIVE, 'No HTML tree for {} from {}'.format(pn, self.name)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]', '', str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))): self.logger.log( DEBUG_OBSESSIVE, 'No part number {} in HTML page from {}'.format(pn, self.name)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', class_='advLineLevelContainer'): return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('div', class_=('resultsTable', 'results-table-container')) is not None: self.logger.log( DEBUG_OBSESSIVE, 'Found product table for {} from {}'.format(pn, self.name)) if descend <= 0: self.logger.log( DEBUG_OBSESSIVE, 'Passed descent limit for {} from {}'.format( pn, self.name)) raise PartHtmlError else: # Look for the table of products. products = tree.find('table', id='results-table').find_all( 'tr', class_='resultRow') # Extract the product links for the part numbers from the table. product_links = [ p.find('a', class_='product-name').get('href') for p in products ] # Extract all the part numbers from the text portion of the links. part_numbers = [ p.find('span', class_='text-contents').get_text() for p in products ] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for i in range(len(product_links)): if part_numbers[i] == match: # Get the tree for the linked-to page and return that. self.logger.log( DEBUG_OBSESSIVE, 'Selecting {} from product table for {} from {}'. format(part_numbers[i], pn, self.name)) return self.dist_get_part_html_tree( pn, extra_search_terms, url=product_links[i], descend=descend - 1) # I don't know what happened here, so give up. self.logger.log(DEBUG_OBSESSIVE, 'Unknown error for {} from {}'.format(pn, self.name)) self.logger.log(DEBUG_HTTP_RESPONSES, 'Response was %s' % html) raise PartHtmlError
def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None, scrape_retries=2): '''@brief Find the Mouser HTML page for a part number and return the URL and parse tree. @param dist @param pn Part number `str()`. @param extra_search_terms @param url @param descend @param local_part_html @param scrape_retries `int` Quantity of retries in case of fail. @return (html `str()` of the page, url) ''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: url = 'https://www.mouser.com/Search/Refine.aspx?Keyword=' + urlquote( pn + ' ' + extra_search_terms, safe='') elif url[0] == '/': url = 'https://www.mouser.com' + url elif url.startswith('..'): url = 'https://www.mouser.com/Search/' + url # Open the URL, read the HTML from it, and parse it into a tree structure. req = FakeBrowser(url) req.add_header('Cookie', 'preferences=ps=www2&pl=en-US&pc_www2=USDe') for _ in range(scrape_retries): try: response = urlopen(req) html = response.read() break except WEB_SCRAPE_EXCEPTIONS: logger.log( DEBUG_DETAILED, 'Exception while web-scraping {} from {}'.format(pn, dist)) pass else: # Couldn't get a good read from the website. logger.log(DEBUG_OBSESSIVE, 'No HTML page for {} from {}'.format(pn, dist)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]', '', str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))): logger.log(DEBUG_OBSESSIVE, 'No part number {} in HTML page from {}'.format(pn, dist)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: logger.log(DEBUG_OBSESSIVE, 'No HTML tree for {} from {}'.format(pn, dist)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', id='pdpPricingAvailability') is not None: return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('div', id='searchResultsTbl') is not None: logger.log(DEBUG_OBSESSIVE, 'Found product table for {} from {}'.format(pn, dist)) if descend <= 0: logger.log(DEBUG_OBSESSIVE, 'Passed descent limit for {} from {}'.format(pn, dist)) raise PartHtmlError else: # Look for the table of products. products = tree.find('table', class_='SearchResultsTable').find_all( 'tr', class_=('SearchResultsRowOdd', 'SearchResultsRowEven')) # Extract the product links for the part numbers from the table. product_links = [ p.find('div', class_='mfrDiv').a for p in products ] # Extract all the part numbers from the text portion of the links. part_numbers = [l.text for l in product_links] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for l in product_links: if l.text == match: # Get the tree for the linked-to page and return that. logger.log( DEBUG_OBSESSIVE, 'Selecting {} from product table for {} from {}'. format(l.text, pn, dist)) return get_part_html_tree(dist, pn, extra_search_terms, url=l.get('href', ''), descend=descend - 1, scrape_retries=scrape_retries) # I don't know what happened here, so give up. logger.log(DEBUG_OBSESSIVE, 'Unknown error for {} from {}'.format(pn, dist)) raise PartHtmlError
def dist_get_part_html_tree(self, pn, extra_search_terms='', url=None, descend=2): '''@brief Find the Mouser HTML page for a part number and return the URL and parse tree. @param pn Part number `str()`. @param extra_search_terms @param url @param descend @return (html `str()` of the page, url) ''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: url = distributor_dict[self.name]['site']['url'] + \ '/Search/Refine.aspx?Keyword=' + urlquote(pn, safe='') if extra_search_terms: url = url + urlquote(' ' + extra_search_terms, safe='') elif url[0] == '/': url = distributor_dict[self.name]['site']['url'] + url elif url.startswith('..'): url = distributor_dict[self.name]['site']['url'] + '/Search/' + url # Open the URL, read the HTML from it, and parse it into a tree structure. try: html = self.browser.scrape_URL(url) except Exception as ex: self.logger.log( DEBUG_OBSESSIVE, 'No HTML page for {} from {}'.format(pn, self.name)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]', '', str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))): self.logger.log( DEBUG_OBSESSIVE, 'No part number {} in HTML page from {}'.format(pn, self.name)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: self.logger.log( DEBUG_OBSESSIVE, 'No HTML tree for {} from {}'.format(pn, self.name)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', id='pdpPricingAvailability') is not None: return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('div', id='searchResultsTbl') is not None: self.logger.log( DEBUG_OBSESSIVE, 'Found product table for {} from {}'.format(pn, self.name)) if descend <= 0: self.logger.log( DEBUG_OBSESSIVE, 'Passed descent limit for {} from {}'.format( pn, self.name)) raise PartHtmlError else: # Look for the table of products. products = tree.find('table', class_='SearchResultsTable').find_all( 'tr', class_=('SearchResultsRowOdd', 'SearchResultsRowEven')) # Extract the product links for the part numbers from the table. product_links = [ p.find('div', class_='mfrDiv').a for p in products ] # Extract all the part numbers from the text portion of the links. part_numbers = [l.text for l in product_links] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for l in product_links: if l.text == match: # Get the tree for the linked-to page and return that. self.logger.log( DEBUG_OBSESSIVE, 'Selecting {} from product table for {} from {}'. format(l.text, pn, self.name)) return self.dist_get_part_html_tree( pn, extra_search_terms, url=l.get('href', ''), descend=descend - 1) # I don't know what happened here, so give up. self.logger.log(DEBUG_OBSESSIVE, 'Unknown error for {} from {}'.format(pn, self.name)) self.logger.log(DEBUG_HTTP_RESPONSES, 'Response was %s' % html) raise PartHtmlError
def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None, scrape_retries=2): '''@brief Find the farnell HTML page for a part number and return the URL and parse tree. @param dist @param pn Part number `str()`. @param extra_search_terms @param url @param descend @param local_part_html @param scrape_retries `int` Quantity of retries in case of fail. @return (html `str()` of the page, url) ''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: # url = 'http://www.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15003&langId=-1&storeId=10194&gs=true&st=' + urlquote( # pn + ' ' + extra_search_terms, # safe='') url = 'http://it.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15001&langId=-4&storeId=10165&gs=true&st=' + urlquote( pn + ' ' + extra_search_terms, safe='') elif url[0] == '/': url = 'http://www.farnell.com' + url elif url.startswith('..'): url = 'http://www.farnell.com/Search/' + url # Open the URL, read the HTML from it, and parse it into a tree structure. try: html = fake_browser(url, scrape_retries) except: logger.log(DEBUG_OBSESSIVE, 'No HTML page for {} from {}'.format(pn, dist)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]', '', str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))): logger.log(DEBUG_OBSESSIVE, 'No part number {} in HTML page from {}'.format(pn, dist)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: logger.log(DEBUG_OBSESSIVE, 'No HTML tree for {} from {}'.format(pn, dist)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', class_='productDisplay', id='page') is not None: return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('table', class_='productLister', id='sProdList') is not None: logger.log(DEBUG_OBSESSIVE, 'Found product table for {} from {}'.format(pn, dist)) if descend <= 0: logger.log(DEBUG_OBSESSIVE, 'Passed descent limit for {} from {}'.format(pn, dist)) raise PartHtmlError else: # Look for the table of products. products = tree.find('table', class_='productLister', id='sProdList').find_all('tr', class_='altRow') # Extract the product links for the part numbers from the table. product_links = [] for p in products: try: product_links.append( p.find('td', class_='mftrPart').find('a')) except AttributeError: continue # Extract all the part numbers from the text portion of the links. part_numbers = [l.text for l in product_links] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for l in product_links: if l.text == match: # Get the tree for the linked-to page and return that. logger.log( DEBUG_OBSESSIVE, 'Selecting {} from product table for {} from {}'. format(l.text.strip(), pn, dist)) return get_part_html_tree(dist, pn, extra_search_terms, url=l.get('href', ''), descend=descend - 1, scrape_retries=scrape_retries) # I don't know what happened here, so give up. logger.log(DEBUG_OBSESSIVE, 'Unknown error for {} from {}'.format(pn, dist)) raise PartHtmlError
def get_mouser_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): '''Find the Mouser HTML page for a part number and return the URL and parse tree.''' # Use the part number to lookup the part using the site search function, unless a starting url was given. if url is None: url = 'http://www.mouser.com/Search/Refine.aspx?Keyword=' + urlquote( pn + ' ' + extra_search_terms, safe='') elif url[0] == '/': url = 'http://www.mouser.com' + url elif url.startswith('..'): url = 'http://www.mouser.com/Search/' + url # Open the URL, read the HTML from it, and parse it into a tree structure. req = FakeBrowser(url) req.add_header('Cookie', 'preferences=ps=www2&pl=en-US&pc_www2=USDe') for _ in range(HTML_RESPONSE_RETRIES): try: response = urlopen(req) html = response.read() break except WEB_SCRAPE_EXCEPTIONS: logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) pass else: # Couldn't get a good read from the website. logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) raise PartHtmlError # Abort if the part number isn't in the HTML somewhere. # (Only use the numbers and letters to compare PN to HTML.) if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist)) raise PartHtmlError try: tree = BeautifulSoup(html, 'lxml') except Exception: logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) raise PartHtmlError # If the tree contains the tag for a product page, then just return it. if tree.find('div', id='product-details') is not None: return tree, url # If the tree is for a list of products, then examine the links to try to find the part number. if tree.find('table', class_='SearchResultsTable') is not None: logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) if descend <= 0: logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) raise PartHtmlError else: # Look for the table of products. products = tree.find( 'table', class_='SearchResultsTable').find_all( 'tr', class_=('SearchResultsRowOdd', 'SearchResultsRowEven')) # Extract the product links for the part numbers from the table. product_links = [p.find('div', class_='mfrDiv').a for p in products] # Extract all the part numbers from the text portion of the links. part_numbers = [l.text for l in product_links] # Look for the part number in the list that most closely matches the requested part number. match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] # Now look for the link that goes with the closest matching part number. for l in product_links: if l.text == match: # Get the tree for the linked-to page and return that. logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist)) return get_mouser_part_html_tree(dist, pn, extra_search_terms, url=l['href'], descend=descend-1) # I don't know what happened here, so give up. logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) raise PartHtmlError