Beispiel #1
0
def lc_get_close_matches(lbl, possibilities, num_matches=3, cutoff=0.6):
    '''Return list of closest matches to lbl from possibilities (case-insensitive).'''

    if USING_PYTHON2:
        lc_lbl = str.lower(unicode(lbl))
        lc_possibilities = [str.lower(unicode(p)) for p in possibilities]
    else:
        lc_lbl = str.lower(lbl)
        lc_possibilities = [str.lower(p) for p in possibilities]
    lc_matches = get_close_matches(lc_lbl, lc_possibilities, num_matches, cutoff)
    return [possibilities[lc_possibilities.index(m)] for m in lc_matches]
Beispiel #2
0
def lc_get_close_matches(lbl, possibilities, num_matches=3, cutoff=0.6):
    '''Return list of closest matches to lbl from possibilities (case-insensitive).'''

    if USING_PYTHON2:
        lc_lbl = str.lower(unicode(lbl))
        lc_possibilities = [str.lower(unicode(p)) for p in possibilities]
    else:
        lc_lbl = str.lower(lbl)
        lc_possibilities = [str.lower(p) for p in possibilities]
    lc_matches = get_close_matches(lc_lbl, lc_possibilities, num_matches, cutoff)
    return [possibilities[lc_possibilities.index(m)] for m in lc_matches]
Beispiel #3
0
def lc_get_close_matches(lbl, possibilities, num_matches=3, cutoff=0.6):
    '''Return list of closest matches to lbl from possibilities (case-insensitive).'''

    # Strip any non-strings so str.lower() doesn't crash.
    possibilities = [p for p in possibilities if isinstance(p, basestring)]

    if USING_PYTHON2:
        lc_lbl = str.lower(unicode(lbl))
        lc_possibilities = [str.lower(unicode(p)) for p in possibilities]
    else:
        lc_lbl = str.lower(lbl)
        lc_possibilities = [str.lower(p) for p in possibilities]
    lc_matches = get_close_matches(lc_lbl, lc_possibilities, num_matches, cutoff)
    return [possibilities[lc_possibilities.index(m)] for m in lc_matches]
Beispiel #4
0
 def _sign(self, args):
     params = list(zip(list(args.keys()), list(args.values())))
     params.sort(key=lambda k: str.lower(k[0]))
     hash_str = "&".join(
                 ["=".join(
                     [str.lower(r[0]),
                      str.lower(
                             urllib.parse.quote_plus(str(r[1]))
                      ).replace("+", "%20")]
                 ) for r in params]
     )
     signature = base64.encodestring(hmac.new(self.api_secret.encode('utf-8'),
                                              hash_str.encode('utf-8'),
                                              hashlib.sha1).digest()).strip()
     self.signature = signature
Beispiel #5
0
    def __init__(self, logfile, instance, namespace, location,
                 remote_conn_details):
        super(ConnectMUMPS, self).__init__()

        self.type = str.lower(instance)
        self.namespace = str.upper(namespace)
        self.prompt = self.namespace + '>'

        # Create a new SSH client object
        client = paramiko.SSHClient()

        # Set SSH key parameters to auto accept unknown hosts
        client.load_system_host_keys()
        client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

        # Connect to the host
        client.connect(hostname=remote_conn_details.remote_address,
                       port=remote_conn_details.remote_port,
                       username=remote_conn_details.username,
                       password=remote_conn_details.password)

        # Create a client interaction class which will interact with the host
        from paramikoe import SSHClientInteraction
        interact = SSHClientInteraction(client, timeout=10, display=False)
        self.connection = interact
        self.connection.logfile_read = open(logfile, 'w')
        self.client = client  # apparently there is a deconstructor which disconnects (probably sends a FYN packet) when client is gone
Beispiel #6
0
    def __init__(self,
                 config=None,
                 config_json=None,
                 config_fp=None,
                 config_dir=None,
                 areas=None,
                 peril_areas=None,
                 peril_areas_index=None,
                 peril_areas_index_props=None,
                 loc_to_global_areas_boundary_min_distance=0,
                 vulnerabilities=None,
                 loc_id_col='locnumber'):
        super(self.__class__, self).__init__(
            config=config,
            config_json=config_json,
            config_fp=config_fp,
            config_dir=config_dir,
        )

        loc_config = self.config.get('exposure') or lookup.config.get(
            'locations')
        self.loc_id_col = str.lower(str(
            loc_config.get('id_col') or loc_id_col))

        self.peril_lookup = OasisPerilLookup(
            config=self.config,
            config_dir=self.config_dir,
            areas=areas,
            peril_areas=peril_areas,
            peril_areas_index=peril_areas_index,
            peril_areas_index_props=peril_areas_index_props,
            loc_to_global_areas_boundary_min_distance=
            loc_to_global_areas_boundary_min_distance,
            loc_id_col=self.loc_id_col)

        self.peril_area_id_key = str(
            str(self.config['peril'].get('peril_area_id_col') or '')
            or 'peril_area_id').lower()

        self.vulnerability_id_key = str(
            str(self.config['vulnerability'].get('vulnerability_id_col'))
            or 'vulnerability_id').lower()

        self.vulnerability_lookup = OasisVulnerabilityLookup(
            config=self.config,
            config_dir=self.config_dir,
            vulnerabilities=vulnerabilities,
            loc_id_col=self.loc_id_col)
Beispiel #7
0
    def __init__(
        self,
        config=None,
        config_json=None,
        config_fp=None,
        config_dir=None,
        vulnerabilities=None,
        loc_id_col='locnumber'
    ):
        super(self.__class__, self).__init__(config=config, config_json=config_json, config_fp=config_fp, config_dir=config_dir)

        if vulnerabilities or self.config.get('vulnerability'):
            self.col_dtypes, self.key_cols, self.vuln_id_col, self.vulnerabilities = self.get_vulnerabilities(vulnerabilities=vulnerabilities)

        if self.config.get('exposure') or self.config.get('locations'):
            self.loc_id_col = str.lower(str(self.config['exposure'].get('id_col') or loc_id_col))
def guess_collections(PossibleCollections, PossibleGenera, Genus):
    """By comparing user-provided genus and iDigBio-scraped genera, program guesses correct collection"""
    BestGuess = None
    for i in range(
            len(PossibleGenera)
    ):  #take a guess of correct collection based on which record matches user-provided genus
        if PossibleGenera[i] == str.lower(Genus[0]):
            BestGuess = i
            print()
            print('Best guess of correct collection: ' +
                  PossibleCollections[i])
            GoodGuess = eval(input("Is this the correct collection? [y/n]"))
            if GoodGuess == 'y':
                CollectionsChoice = PossibleCollections[BestGuess]
    if BestGuess == None:
        print("No match found. Can't guess. Please choose a collection.")
        CollectionsChoice = user_choose_collection(PossibleCollections)
    return CollectionsChoice
Beispiel #9
0
    def generate_peril_areas_rtree_file_index(
        self,
        keys_data_fp,
        areas_rtree_index_fp,
        lookup_config_fp=None,
        lookup_config=None,
    ):

        # Convert paths to absolute
        keys_data_fp = as_path(keys_data_fp, 'Lookup Data directory', is_dir=True, preexists=True)
        areas_rtree_index_fp = as_path(areas_rtree_index_fp, 'Index output file path', preexists=False)
        lookup_config_fp = as_path(lookup_config_fp, 'Built-in lookup config file path', preexists=True)

        if not (lookup_config or lookup_config_fp):
            raise OasisException('Either a built-in lookup config. or config. file path is required')

        config = get_json(src_fp=lookup_config_fp) if lookup_config_fp else lookup_config

        config_dir = os.path.dirname(lookup_config_fp) if lookup_config_fp else keys_data_fp

        peril_config = config.get('peril')

        if not peril_config:
            raise OasisException(
                'The lookup config must contain a peril-related subdictionary with a key named '
                '`peril` defining area-peril-related model information'
            )

        areas_fp = peril_config.get('file_path')

        if not areas_fp:
            raise OasisException(
                'The lookup peril config must define the path of a peril areas '
                '(or area peril) file with the key name `file_path`'
            )

        if areas_fp.startswith('%%KEYS_DATA_PATH%%'):
            areas_fp = areas_fp.replace('%%KEYS_DATA_PATH%%', keys_data_fp)

        if not os.path.isabs(areas_fp):
            areas_fp = os.path.join(config_dir, areas_fp)
            areas_fp = as_path(areas_fp, 'areas_fp')

        src_type = str.lower(str(peril_config.get('file_type')) or '') or 'csv'

        peril_id_col = str.lower(str(peril_config.get('peril_id_col')) or '') or 'peril_id'

        coverage_config = config.get('coverage')

        if not coverage_config:
            raise OasisException(
                'The lookup config must contain a coverage-related subdictionary with a key named '
                '`coverage` defining coverage related model information'
            )

        coverage_type_col = str.lower(str(coverage_config.get('coverage_type_col')) or '') or 'coverage_type'

        peril_area_id_col = str.lower(str(peril_config.get('peril_area_id_col')) or '') or 'area_peril_id'

        area_poly_coords_cols = peril_config.get('area_poly_coords_cols')

        if not area_poly_coords_cols:
            raise OasisException(
                'The lookup peril config must define the column names of '
                'the coordinates used to define areas in the peril areas '
                '(area peril) file using the key `area_poly_coords_cols`'
            )

        non_na_cols = (
            tuple(col.lower() for col in peril_config['non_na_cols']) if peril_config.get('non_na_cols')
            else tuple(col.lower() for col in [peril_area_id_col] + area_poly_coords_cols.values())
        )

        col_dtypes = peril_config.get('col_dtypes') or {peril_area_id_col: int}

        sort_cols = peril_config.get('sort_cols') or peril_area_id_col

        area_poly_coords_seq_start_idx = peril_config.get('area_poly_coords_seq_start_idx') or 1

        area_reg_poly_radius = peril_config.get('area_reg_poly_radius') or 0.00166

        index_props = peril_config.get('rtree_index')
        index_props.pop('filename')

        return PerilAreasIndex.create_from_peril_areas_file(
            src_fp=areas_fp,
            src_type=src_type,
            peril_id_col=peril_id_col,
            coverage_type_col=coverage_type_col,
            peril_area_id_col=peril_area_id_col,
            non_na_cols=non_na_cols,
            col_dtypes=col_dtypes,
            sort_cols=sort_cols,
            area_poly_coords_cols=area_poly_coords_cols,
            area_poly_coords_seq_start_idx=area_poly_coords_seq_start_idx,
            area_reg_poly_radius=area_reg_poly_radius,
            index_fp=areas_rtree_index_fp,
            index_props=index_props
        )
Beispiel #10
0
    def __init__(self,
                 areas=None,
                 config=None,
                 config_json=None,
                 config_fp=None,
                 config_dir=None,
                 loc_to_global_areas_boundary_min_distance=0,
                 peril_areas=None,
                 peril_areas_index=None,
                 peril_areas_index_fp=None,
                 peril_areas_index_props=None,
                 loc_id_col='locnumber'):
        super(self.__class__, self).__init__(config=config,
                                             config_json=config_json,
                                             config_fp=config_fp,
                                             config_dir=config_dir)

        peril_config = self.config.get('peril') or {}

        if areas or peril_areas or peril_config:
            if peril_areas_index:
                self.peril_areas_index = peril_areas_index
                self.peril_areas_index_props = self.peril_areas_index_props.properties.as_dict(
                )
            elif (areas or peril_areas):
                self.index_props = (peril_areas_index_props
                                    or peril_config.get('rtree_index')
                                    or DEFAULT_RTREE_INDEX_PROPS)
                self.peril_areas_index = PerilAreasIndex(
                    areas=areas,
                    peril_areas=peril_areas,
                    properties=self.index_props)
            else:
                areas_rtree_index_config = peril_config.get(
                    'rtree_index') or {}
                index_fp = peril_areas_index_fp or areas_rtree_index_config.get(
                    'filename')

                if not os.path.isabs(index_fp):
                    index_fp = os.path.join(self.config_dir, index_fp)
                    index_fp = as_path(index_fp, 'index_fp', preexists=False)

                if index_fp:
                    idx_ext = areas_rtree_index_config.get(
                        'idx_extension') or 'idx'
                    dat_ext = areas_rtree_index_config.get(
                        'dat_extension') or 'dat'
                    if not (os.path.exists('{}.{}'.format(index_fp, idx_ext))
                            or os.path.exists('{}.{}'.format(
                                index_fp, dat_ext))):
                        raise OasisException(
                            'No Rtree file index {}.{{{}, {}}} found'.format(
                                index_fp, idx_ext, dat_ext))
                    self.peril_areas_index = PerilAreasIndex(fp=index_fp)
                    self.peril_areas_index_props = self.peril_areas_index.properties.as_dict(
                    )

            self.peril_areas_boundary = box(*self.peril_areas_index.bounds,
                                            ccw=False)

            _centroid = self.peril_areas_boundary.centroid
            self.peril_areas_centre = _centroid.x, _centroid.y

            self.loc_to_global_areas_boundary_min_distance = (
                loc_to_global_areas_boundary_min_distance
                or self.config['peril'].get(
                    'loc_to_global_areas_boundary_min_distance') or 0)

        if self.config.get('exposure') or self.config.get('locations'):
            self.loc_id_col = str.lower(
                str(self.config['exposure'].get('id_col') or loc_id_col))
            self.loc_coords_x_col = str.lower(
                str(self.config['exposure'].get('coords_x_col')) or 'lon')
            self.loc_coords_y_col = str.lower(
                str(self.config['exposure'].get('coords_y_col')) or 'lat')
            self.loc_coords_x_bounds = tuple(
                self.config['exposure'].get('coords_x_bounds') or
                ()) or (-180, 180)
            self.loc_coords_y_bounds = tuple(
                self.config['exposure'].get('coords_y_bounds') or
                ()) or (-90, 90)
Beispiel #11
0
def get_rs_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
    '''Find the RS Components HTML page for a part number and return the URL and parse tree.'''
            
    # Use the part number to lookup the part using the site search function, unless a starting url was given.
    if url is None:
        url = 'http://it.rs-online.com/web/c/?searchTerm=' + urlquote(pn + ' ' + extra_search_terms, safe='')

    elif url[0] == '/':
        url = 'http://it.rs-online.com' + url
    elif url.startswith('..'):
        url = 'http://it.rs-online.com/Search/' + url

    # Open the URL, read the HTML from it, and parse it into a tree structure.
    for _ in range(HTML_RESPONSE_RETRIES):
        try:
            req = FakeBrowser(url)
            response = urlopen(req)
            html = response.read()
            break
        except WEB_SCRAPE_EXCEPTIONS:
            logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
            pass
    else: # Couldn't get a good read from the website.
        logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
        raise PartHtmlError

    try:
        tree = BeautifulSoup(html, 'lxml')
    except Exception:
        logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # Abort if the part number isn't in the HTML somewhere.
    # (Only use the numbers and letters to compare PN to HTML.)
    if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
        logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist))
        raise PartHtmlError
        
    # If the tree contains the tag for a product page, then just return it.
    if tree.find('div', class_='specTableContainer') is not None:
        return tree, url

    # If the tree is for a list of products, then examine the links to try to find the part number.
    if tree.find('div', class_='srtnPageContainer') is not None:
        logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
        if descend <= 0:
            logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
            raise PartHtmlError
        else:
            # Look for the table of products.
            products = tree.find_all('tr', class_='resultRow')

            # Extract the product links for the part numbers from the table.
            product_links= []
            for p in products:
                try:
                    product_links.append(p.find('a',class_='primarySearchLink')['href'])
                    # Up to now get the first url found in the list. i.e. do not choose the url based on the stock type (e.g. single unit, reel etc.)
                    return get_rs_part_html_tree(dist, pn, extra_search_terms,url=product_links[0], descend=descend-1)
                except AttributeError:
                    continue
                except TypeError:
                    #~ print('****************dist:',dist,'pn:**************************',pn)
                    continue
            
            

    #~ # If the tree is for a list of products, then examine the links to try to find the part number.
    #~ if tree.find('div', class_='srtnPageContainer') is not None:
        #~ if descend <= 0:
            #~ raise PartHtmlError
        #~ else:
            #~ # Look for the table of products.
            #~ products = tree.find('table',
                                 #~ class_='productLister',
                                 #~ id='sProdList').find_all('tr',
                                                          #~ class_='altRow')

            #~ # Extract the product links for the part numbers from the table.
            #~ product_links = []
            #~ for p in products:
                #~ try:
                    #~ product_links.append(
                        #~ p.find('td',
                               #~ class_='mftrPart').find('p',
                                                       #~ class_='wordBreak').a)
                #~ except AttributeError:
                    #~ continue

            #~ # Extract all the part numbers from the text portion of the links.
            #~ part_numbers = [l.text for l in product_links]

            #~ # Look for the part number in the list that most closely matches the requested part number.
            #~ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

            #~ # Now look for the link that goes with the closest matching part number.
            #~ for l in product_links:
                #~ if l.text == match:
                    #~ # Get the tree for the linked-to page and return that.
                    #~ return get_rs_part_html_tree(dist, pn, extra_search_terms,
                                #~ url=l['href'], descend=descend-1)

    # I don't know what happened here, so give up.
    logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
    raise PartHtmlError
Beispiel #12
0
    def importieren(self, pfad = None, liste = None, ergaenzungsname = None, anzeigename_ergaenzen = False, nach_unten = False, force_gruppenname = None, force_scale = None, DBschema_erweitern = True):



        # Der Username der verwendet werden soll
        if len(auth_user_global) > 0:    # Ist belegt
            auth_user = auth_user_global[0]
        else:
            auth_user = None


        self.iface.layerTreeView().setCurrentLayer(None)    # None entspricht einem Null Pointer -> Auswahl wird entfernt -> nicht ausgewählt


        # Wird in der Regel verwendet wenn
        # Gemeindespezifische Daten geladen werden
        # zwecks Übersichtlichkeit
        self.anzeigename_aendern = anzeigename_ergaenzen
        self.gruppen_erg_name = ergaenzungsname     # oberste Gruppe/Layer wird mit diesem Namen ergänzt!


        if pfad == "":

            return

        # Das Qgis Projektfile ist ein XML und wird
        # hier eingelesen
        try:

            #pfad = 'd:/delme.qgs'
            #xml = file(pfad).read()
            #QtWidgets.QMessageBox.about(None, "Fehler", str(locale.getpreferredencoding()))
            project_file = open(pfad,'r',-1,'UTF8')
            xml = project_file.read()

            d = QtXml.QDomDocument()
            d.setContent(xml)

        except IOError:
            QtWidgets.QMessageBox.about(None, "Fehler", "QGIS Projektdatei " + pfad + " nicht gefunden!")
            return


        # Die gewünschten Tagelemente aus dem XML herauslesen
        self.maps = d.elementsByTagName("maplayer")
        self.legends = d.elementsByTagName("legendlayer")
        self.gruppen = d.elementsByTagName("legendgroup")


        self.lyr = None
        self.joinlayerid = None

        #Zuerst den aktuellen Pfad auf dem
        #Qgis steht auslesen (kann z.B. ein lokaler Pfad sein
        #von dem ein Projekt geladen wird
        CurrentPath = QgsProject.instance().fileName()


        #Dann auf den jeweiligen Pfad setzen, von dem geladen wird. Sonst kann kein Projekt
        #mit absoluten Pfaden abgespeichert werden (für Layer die mit dem
        #VogisMenü geladen werden)
        QgsProject.instance().setFileName(pfad)


        #falls es länger dauert, ein kurzes Infofenster
        #für den Anwender
        progressi = QtWidgets.QProgressDialog('Lade Daten','Abbrechen',0,self.maps.length())
        progressi.setFixedSize(350,90)
        btnCancel = QtWidgets.QPushButton()
        btnCancel.setText('Abbrechen')
        btnCancel.setFixedSize(70,30)
        progressi.setCancelButton(btnCancel)
        progressi.setWindowModality(1)


        #Schleife geht alle Layer die in der Legende aufscheinen durch. Hier
        #ist nämlich die reihenfolge festgelegt, wie sie in Qgis dargestellt werden
        #Diese Schleife brauch ich nur für die richtige Reihenfolge
        #der importierten Layer in Qgis

        zaehler = 0 # der Zähler für die Anzahl der geladenen Layer
        j = 0
        #for j in range(self.legends.length(),-1,-1):
        for j in range(self.legends.length()):


            # Schleife geht alle Layer die in der maplayer tags aufscheinen durch
            # dort ist nämlich die wirkliche Information für die Darstellung im
            # Qgis. Also wird zuerst der Layer per ID in der Obigen
            # Schleife ausgewählt und dann in dieser Schleife im maplayertag
            # identifiziert
            # self.lyr=None


            for i in range(self.maps.length()):

                # prüfen ob der jeweilige layer nicht schon geladen ist. um das zu tun
                # müssen wir im vogis projektimport die identifikation über
                # die layerid tag machen. berücksichtigt werden muß auch
                # ob die layerid durch den ergaenzungsnamen erweitert wurde!!
                quelli = self.maps.item(i).namedItem("id").firstChild().toText().data()

                laden = True

                lyr_tmp = None

                for lyr_tmp in QgsProject.instance().mapLayers(): #alle bereits geladenen Layer durchgehen -> Dictionary
                    #QtWidgets.QMessageBox.about(None, "Fehler", str(lyr_tmp))
                    if (ergaenzungsname == None) and (lyr_tmp  == quelli):  #Treffer: der Layer ist schon geladen
                        laden = False
                    if (ergaenzungsname != None) and (lyr_tmp  == quelli + ergaenzungsname): #Treffer: der Layer ist schon geladen
                        laden = False


                #Die Layerid ist in den legend tags und maplayer tags gleich
                #so kann ein layer genau identifiziert werden. ist laden zudem True
                #gehts also weiter

                if (self.maps.item(i).namedItem("id").firstChild().toText().data() == self.legends.item(j).namedItem("filegroup").namedItem("legendlayerfile").attributes().namedItem("layerid").nodeValue()) and laden:


            #ACHTUNG: Wieder aktivieren!!!!!!!!!!

                    # wenn nur ein Teil der Layer eines Projekts geladen werden sollen. Die Liste enthält die
                    # Namen dieser Layer

                    if liste  != None:

                        brake_val = True
                        for nd in range(len(liste)):
                           if liste[nd] == self.legends.item(j).attributes().namedItem("name").nodeValue():
                                brake_val = False
                                break
                        if brake_val:
                            continue    # Nächster Layer, ist nicht auf der Liste




                    # prüfen, ob der jeweilige Layer eine oder mehrere Jointabelle(n) verwendet
                    self.joinlayerid = ''
                    for sj in range(self.maps.item(i).namedItem("vectorjoins").childNodes().length()):

                        # leider muss ich dann nochmals alles durchgehen....
                        for lj in range(self.maps.length()):
                            if (self.maps.item(lj).namedItem("id").firstChild().toText().data() == self.maps.item(i).namedItem("vectorjoins").childNodes().item(sj).attributes().namedItem('joinLayerId').nodeValue()):
                                self.joinlayerid = self.maps.item(i).namedItem("vectorjoins").childNodes().item(sj).attributes().namedItem('joinLayerId').nodeValue()




                    #ACHTUNG: unbedingt den nodeValue der ID ändern wenn Gemeindeweise
                    #geladen wird (DKM) Da in den Qgis Projekten der Gemeinden die jeweilig ID des Layers
                    #der Einfachheit halber ident ist, würde so qgis den Layer nicht importieren!!!
                    #So wie der Layername in der Darstellung geändert wird wird auch die ID des Nodes VOR
                    #dem Laden geändert, damit Qgis das dann so übernimmt!!
                    noddi = self.maps.item(i).namedItem("id")
                    if ergaenzungsname != None:
                            noddi.firstChild().setNodeValue(noddi.firstChild().nodeValue() + ergaenzungsname)




                    #Abhängig von der vogisini wird das Encoding
                    #aus der Projektdatei genommen oder CPG datei oder
                    #wird auf System gesetzt
                    #ist self.vogisEncoding == project dann werden die Einstellungen des Projekt verwendet

                    base_name = os.path.dirname(pfad) + '/' + os.path.basename(self.maps.item(i).namedItem("datasource").firstChild().nodeValue())



                    # Achtung, zwischen absolutem und relativem Pfad unterscheiden

                    if len(os.path.dirname(self.maps.item(i).namedItem("datasource").firstChild().nodeValue())) < 2: # relativer Pfad im QGIS Projekt!
                        base_name = os.path.dirname(pfad) + '/' + os.path.basename(self.maps.item(i).namedItem("datasource").firstChild().nodeValue())

                    else:    # absoluter Pfad im QGIS Projekt!
                        base_name = self.maps.item(i).namedItem("datasource").firstChild().nodeValue()

                    if vogisEncoding_global[0] == 'menue':  # entweder CPG datei oder System setzen


                        try:   # gibts ein cpg datei
                            datei = open(os.path.splitext(base_name)[0] + '.cpg','r')
                            codierung_string = datei.read()
                            datei.close()
                            self.maps.item(i).namedItem("provider").attributes().namedItem('encoding').setNodeValue(codierung_string)

                        except IOError: # Es wird der Wert System zugewiesen
                            self.maps.item(i).namedItem("provider").attributes().namedItem('encoding').setNodeValue('System')

                    # unbedingt ALLES DESELEKTIEREN, sonst Probleme mit der Reihenfolge

                    self.iface.layerTreeView().setCurrentLayer(None)    # None entspricht einem Null Pointer -> Auswahl wird entfernt -> nicht ausgewählt


                    nv_ds = ''
                    nv_provider = ''
                    nv_encoding = ''

                    #############################################################################
                    # Das Umschalten der Vektordaten auf die Geodatenbank - unter Bedingungen
                    # es darf kein Layer aus einer Geodatenbank hier verwurschtelt werden
                    #############################################################################

                    if self.maps.item(i).attributes().namedItem('type').nodeValue() == 'vector' and vogisDb_global[0] != 'filesystem geodaten' and self.maps.item(i).namedItem("datasource").firstChild().nodeValue().find('host') < 0:

                        tablename = self.maps.item(i).namedItem("datasource").firstChild().nodeValue()

                        sql = ''
                        rc=[]
                        db_ogr = ''

                        # prüfen ob der layer eine shape datenquelle ist
                        # und ob ein subset definiert ist

                        if tablename.find('.shp') > 0 and (tablename.lower().find('subset') > 0 or tablename.lower().find('SUBSET') > 0 or tablename.lower().find('Subset') > 0):

                            rc = textfilter_subset(self.maps.item(i).namedItem("datasource").firstChild().nodeValue())
                            tablename = rc[0]
                            sql = rc[1]
                            db_ogr = rc[0]
                        else:

                            tablename = os.path.basename(self.maps.item(i).namedItem("datasource").firstChild().nodeValue()).split('.shp')[0]
                            db_ogr = tablename

                        if ergaenzungsname != None and DBschema_erweitern:
                            tablename = str.lower('\"' + ergaenzungsname + '\".\"' + tablename + '\"')

                        else:
                            tablename = str.lower('\"vorarlberg".\"' + tablename + '\"')


                        # Sonderzeichen berücksichtigen!
                        tablename = tablename.replace(('ä'),'ae')
                        tablename = tablename.replace(('Ä'),'Ae')
                        tablename = tablename.replace(('ö'),'oe')
                        tablename = tablename.replace(('Ö'),'Oe')
                        tablename = tablename.replace(('ü'),'ue')
                        tablename = tablename.replace(('Ü'),'Ue')
                        tablename = tablename.replace(('ß'),'ss')
                        tablename = tablename.replace('. ','_')


                        ################################################
                        # Geometriespalte bestimmen -- geht nur mit OGR
                        param_list = str.split(vogisDb_global[0])

                        host = ''
                        dbname=''
                        port=''
                        for param in param_list:

                            if str.find(param,'dbname') >= 0:
                                dbname = str.replace(param,'dbname=','')

                            elif str.find(param,'host=') >= 0:
                                host = str.replace(param,'host=','')

                            elif str.find(param,'port=') >= 0:
                                port = str.replace(param,'port=','')

                        try:
                            if auth_user == None:
                                outputdb = ogr.Open('pg: host=' + host  + ' dbname=' + dbname + ' schemas=vorarlberg' + ' port=' + port)
                            else:
                                outputdb = ogr.Open('pg: host=' + host  + ' dbname=' + dbname + ' schemas=vorarlberg' + ' port=' + port + ' user='******'the_geom'
                        ##################################################
                        # Geometriespalte Ende



                        if self.maps.item(i).namedItem("datasource").firstChild().nodeValue().find('ogc_fid') > 0:

                            # Achtung, das Attribut user darf nicht zwingend immer nur klein sein -> Siehe Usermapping in der Doku
                            if auth_user == None:
                                dbpath = str.lower(vogisDb_global[0] + ' sslmode=disable table=' +  tablename +  ' (' + geom_column + ') sql') + sql
                            else:
                                dbpath = str.lower(vogisDb_global[0]) + ' user='******' sslmode=disable table=' +  tablename +  ' (' + geom_column + ') sql') + sql
                        else:
                            # Achtung, das Attribut user darf nicht zwingend immer nur klein sein -> Siehe Usermapping in der Doku
                            if auth_user == None:
                                dbpath = str.lower(vogisDb_global[0] + ' sslmode=disable key=ogc_fid table=' +  tablename +  ' (' + geom_column + ') sql') + sql
                            else:
                                dbpath = str.lower(vogisDb_global[0]) + ' user='******' sslmode=disable key=ogc_fid table=' +  tablename +  ' (' + geom_column + ') sql') + sql

                        nv_ds = self.maps.item(i).namedItem("datasource").firstChild().nodeValue()
                        nv_provider = self.maps.item(i).namedItem("provider").firstChild().nodeValue()
                        nv_encoding = self.maps.item(i).namedItem("provider").attributes().namedItem('encoding').nodeValue()

                        self.maps.item(i).namedItem("datasource").firstChild().setNodeValue(dbpath)
                        self.maps.item(i).namedItem("provider").firstChild().setNodeValue('postgres')
                        self.maps.item(i).namedItem("provider").attributes().namedItem('encoding').setNodeValue('UTF-8')


                    if os.path.abspath(os.path.dirname(__file__)) != path_global[0]:
                        return


                    # Layer  einlesen!
                    proj_read = QgsProject.instance().readLayer(self.maps.item(i))
                    # Der Fortschrittsbalken
                    progressi.setValue(j)
                    progressi.forceShow()
                    if progressi.wasCanceled():
                        break

                    #QtGui.QMessageBox.about(None, "Achtung", str(proj_read))
                    if not proj_read and vogisDb_global[0] == 'filesystem geodaten': # hier wird der Layer geladen und gemäß den Eintragungen
                                                                                     # der DomNode auch gerendert und dargestellt
                        QtWidgets.QMessageBox.about(None, "Achtung", "Layer " + self.legends.item(j).attributes().namedItem("name").nodeValue() + " nicht gefunden!")
                        continue
                    elif not proj_read and vogisDb_global[0] != 'filesystem geodaten':   # Probieren auf Filesystem umzuschalten
                        QtWidgets.QMessageBox.about(None, "Achtung", "Layer - " + self.legends.item(j).attributes().namedItem("name").nodeValue() + " - in der Datenbank nicht gefunden - es wird aufs Filesystem umgeschaltet")
                        self.maps.item(i).namedItem("datasource").firstChild().setNodeValue(nv_ds)
                        self.maps.item(i).namedItem("provider").firstChild().setNodeValue(nv_provider)
                        self.maps.item(i).namedItem("provider").attributes().namedItem(nv_encoding)

                        if not  QgsProject.instance().readLayer(self.maps.item(i)): #Trotzdem nicht gefunden, wir geben auf
                            QtWidgets.QMessageBox.about(None, "Achtung", "Layer " + self.legends.item(j).attributes().namedItem("name").nodeValue() + " nicht gefunden!")
                            continue


                   # den Anzeigenamen im Qgis ebenfalls ändern
                   # dazu zuerst den richtigen Layer anhand der Layerid auswählen
                   # leginterface = self.iface.legendInterface()

                    #for lyr_tmp in leginterface.layers():
                    for lyr_tmp in QgsProject.instance().mapLayers(): #alle bereits geladenen Layer durchgehen -> Dictionary
                        if lyr_tmp == noddi.firstChild().nodeValue():
                            self.lyr = QgsProject.instance().mapLayers()[lyr_tmp]
                            if force_scale != None:
                                self.lyr.setMaximumScale(25000)
                                self.lyr.setScaleBasedVisibility(True)


                    #Abhängig von der vogisini wird das KBS
                    #aus der Projektdatei genommen oder aus dem *.prj File

                    if vogisKBS_global[0] == 'menue':
                        #Koordinatenbezugssystem aus dem prj file holen, wenn vorhanden,
                        #und von dort zuweisen (die Projekteinstellung überschreiben)
                        try:
                            datei = open(os.path.splitext(self.lyr.source())[0] + '.prj','r')
                            bezugssystem_string = datei.read()
                            #falls kein sauberer EPSG String, machen wir eine Zuweisung für unser 31254
                            if (re.search('MGI\D+Austria\D+GK\D+West',bezugssystem_string, re.I)) != None:  #Arcgis macht keinen sauberen EPSG String
                                bezugssystem_crs = QgsCoordinateReferenceSystem()
                                bezugssystem_crs.createFromSrid(31254)
                            else:
                                bezugssystem_crs = QgsCoordinateReferenceSystem(bezugssystem_string)

                            datei.close()

                            self.lyr.setCrs(bezugssystem_crs)

                        except IOError:
                            pass

                    #dann in der Applikation registrieren
                    #QgsMapLayerRegistry.instance().addMapLayer(self.lyr)



                    # gejointe Tabellen brauchen eine Spezialbehandlung: Joininfo wird
                    # ausgelesen, dann der join gelöscht und erst wenn alles geladen wurde
                    # wieder neu erstellt. Sonst kann es Probleme geben! unterstütz
                    # werden beleibig viele layer mit beliebig vielen joins
                    # es handelt sich um einen layer mir midestens einem eingetragenen join

                    single_lyr_join = lyr_join()    # eigenes struktur objekt instanzieren

                    if not self.joinlayerid == '':  # checken ob für den layer mindestens ein join eingetragen ist

                        single_lyr_join.joinlayer = self.lyr
                        single_lyr_join.joininfo = self.lyr.vectorJoins()
                        self.joinliste.append(single_lyr_join)  # eine liste mit joinlayern und deren joininfo führen


                        for rem_join in self.lyr.vectorJoins(): # für den joinlayer die joins entfernen - es können merhere sein
                            kasperle = rem_join.joinLayerId
                            self.lyr.removeJoin(str(rem_join.joinLayerId))




                    #Und nun noch den Layernamen für die Darstellung
                    #im Qgis ergänzen. Siehe oben, bei gemeindeweisem Laden
                    if (ergaenzungsname != None) and (self.lyr != None) and self.anzeigename_aendern: # noch ein boolean wegen der wasserwirtschaft!!
                           if not (self.lyr.name().find(ergaenzungsname) > -1):    # ACHTUNG: Sonst wird bei wiederholtem klicken der Name nochmal rangehängt
                                    if self.lyr.name().find("(a)") > -1:

                                        aktname =  str.strip((self.lyr.name().rstrip("(a)"))) + "-" + ergaenzungsname + " (a)"
                                        self.lyr.setName(aktname)

                                    else:
                                        aktname =   str.strip(self.lyr.name())+ "-" + ergaenzungsname
                                        self.lyr.setName(aktname)


                    # abschließend schauen ob der aktiviert ist
                    if (self.legends.item(j).attributes().namedItem("checked").nodeValue() == "Qt::Unchecked") and not (self.lyr is None):

                        #leginterface.setLayerVisible(self.lyr,False)
                        lyr_tree = QgsProject.instance().layerTreeRoot().findLayer(self.lyr)
                        lyr_tree.setItemVisibilityChecked(False)


                    index = QgsProject.instance().layerTreeRoot()
                    zwetsch =QgsProject.instance().layerTreeRoot().findLayer(self.lyr.id())

                    dummy = zwetsch.clone()


                    # Die Layer die später geladen werden müssen
                    # auch weiter unte in der Legende sein Reihenfolge)
                    # das wird mit der Variable zaehler gesteuert
                    # QGIS höher 2.6

                    index_ins = index_zuweisen(self.legends.item(j).attributes().namedItem("name").nodeValue(),self.legends.item(j).parentNode())
                    index.insertChildNode(-1,dummy)
                    zaehler = zaehler + 1
                    zwetsch.parent().removeChildNode(zwetsch)

                    # sonst gibts probleme in der Reihenfolge
                    # wenn gruppen und layer im top level vermischt
                    if not (self.legends.item(j).parentNode().nodeName() == "legendgroup") and (force_gruppenname is None):

                        zwetsch =QgsProject.instance().layerTreeRoot().findLayer(self.lyr.id())
                        dummy = zwetsch.clone()
                        index.insertChildNode(index_ins,dummy)
                        zwetsch.parent().removeChildNode(zwetsch)


                    #abschließend schauen ob der Layer aufgeklappt ist
                    #und das flag setzen
                    if (self.legends.item(j).attributes().namedItem("open").nodeValue() == "false") and not (self.lyr is None):
                        dummy.setExpanded(False)

                    elif (self.legends.item(j).attributes().namedItem("open").nodeValue() == "true") and not (self.lyr is None):
                        dummy.setExpanded(True)

                    # hier könnte abgebrochen werden, wenn die layer einfach
                    # nur reingeladen werden OHNE in Gruppenlyer abgelegt zu werden
                    # continue


                    #######################################################
                    # hier beginnt der Programmteil der die Gruppenlayer
                    # behandelt - entweder wenn im Projektfile definiert
                    # oder einfach wenn es im Menü
                    # erwünscht wird
                    #######################################################
                    if (self.legends.item(j).parentNode().nodeName() == "legendgroup") or not (force_gruppenname is None):

                        self.gruppe_vorhanden = False

                        #ACHTUNG: Layername und direkt übergeordneter Gruppenname
                        #müssen sich unterscheiden, sonst kommts zu einem Fehler. Sollts
                        #dennoch mal vorkommen, wird es hier abgefangen

                        if self.legends.item(j).parentNode().attributes().namedItem("name").nodeValue() == self.legends.item(j).attributes().namedItem("name").nodeValue():

                            aktname =  self.lyr.name()
                            self.lyr.setName(aktname+"_")

                        #prüfen ob die Gruppe schon angelegt ist
                        grp_name = self.legends.item(j).parentNode().attributes().namedItem("name").nodeValue() #Name der Gruppe aus dem QGS Projektfile
                        grp_obj = QgsProject.instance().layerTreeRoot().findGroup(grp_name)
                        if (isinstance(grp_obj,QgsLayerTreeGroup)) and (not (grp_obj is None)):
                            self.gruppe_vorhanden = True

                        grp_name = force_gruppenname #Name ist übergeben worden
                        grp_obj = QgsProject.instance().layerTreeRoot().findGroup(grp_name)
                        if (isinstance(grp_obj,QgsLayerTreeGroup))  and (not (grp_obj is None)):

                            self.gruppe_vorhanden = True



                        #########################################################
                        # Gruppenlayer aus Projektdatei
                        #########################################################
                        if self.legends.item(j).parentNode().attributes().namedItem("name").nodeValue() != "" and self.legends.item(j).parentNode().nodeName() == "legendgroup":

                            QgsLayerTreeRegistryBridge(QgsProject.instance().layerTreeRoot(),QgsProject.instance())
                            kind = self.legends.item(j).parentNode()

                            gruppen_hierarchie = pos_gruppe()

                            gruppen_liste = []

                            while  (kind.nodeName() == "legendgroup"):

                                gruppen_hierarchie.name = kind.attributes().namedItem("name").nodeValue()                                       # der name der dem layer unmittelbar übergeordnete Gruppe: Ebene
                                gruppen_hierarchie.index  = index_zuweisen(kind.attributes().namedItem("name").nodeValue(),kind.parentNode())   # Index der Darstellungsreihenfolge der Gruppe in ihrer Hierarchie
                                gruppen_hierarchie.ex = kind.attributes().namedItem("open").nodeValue()
                                gruppen_hierarchie.ch = kind.attributes().namedItem("checked").nodeValue()
                                gruppen_liste.append(copy.deepcopy(gruppen_hierarchie)) # ACHTUNG: Referenz!!
                                kind = kind.parentNode()

                            # grp enthält das qtreewidgetitem Objekt der Gruppe!, in die der geladene
                            # Layer verschoben werden soll!
                            grp  = sublayer(QgsProject.instance().layerTreeRoot(),gruppen_liste, self.gruppen_erg_name, nach_unten, anzeigename_ergaenzen)[0] #sollten es mehrere sein, immer nur die erste nehmen - siehe Erklärung beim Sub selbst
                            zwtsch = QgsProject.instance().layerTreeRoot().findLayer(self.lyr.id())
                            dummy = zwtsch.clone()



                            if not (isinstance(grp,QgsLayerTreeGroup)) or grp is None:
                                QtWidgets.QMessageBox.about(None, "ACHTUNG","Anlegen der Gruppe gescheitert")
                                break

                            index_layer  = index_zuweisen(self.legends.item(j).attributes().namedItem("name").nodeValue(),self.legends.item(j).parentNode())

                            # QtGui.QMessageBox.about(None, "LayeriD", str(dummy.layerId()))
                            grp.insertChildNode(index_layer,dummy)
                            zwtsch.parent().removeChildNode(zwtsch) # zwilling entfernen!


                        ##########################################################
                        # hier Endet der Teil der Gruppenlayer aus Projektdatei!!
                        #########################################################

                        letzterplatz = False #Flagvariable ermittelt ob die Gruppe ganz nach unten gehört

                        #die gruppe in die der layer eingebettet ist kommt nicht aus
                        #einem projekt, sondern wird erzwungen. hier gibts allerdings
                        #nur eine ebene (was das ganze einfacher macht)

                        if (not force_gruppenname is None):

                            # gruppe anlegen
                            gruppen_hierarchie = pos_gruppe()
                            gruppen_hierarchie.name = force_gruppenname

                            # grp = sublayer(QgsProject.instance().layerTreeRoot(),leginterface,[gruppen_hierarchie])[0]
                            grp = sublayer(QgsProject.instance().layerTreeRoot(),[gruppen_hierarchie])[0]

                            zwtsch = QgsProject.instance().layerTreeRoot().findLayer(self.lyr.id()) #der geladene layer
                            dummy = zwtsch.clone()

                            # wiviele layer sind in der gruppe bereits vorhanden?
                            # baum = QgsLayerTreeModel(grp)
                            # anzahl_top_level_eintraege = baum.rowCount()
                            baum = grp.findLayers()
                            anzahl_top_level_eintraege = len(baum)
                            baum = None # Sonst Absturz bei grp.parent().removeChildNode(grp) da baum auf ein Nichts refenrenziert!


                            # den neuen ganz hinten einsetzen
                            grp.insertChildNode(anzahl_top_level_eintraege,dummy)
                            zwtsch.parent().removeChildNode(zwtsch)
                            grp.setExpanded(False)

                            if nach_unten:   # ganz nach unten mit der gefüllten Gruppe, wenn das Flag gesetzt ist

                                if not self.gruppe_vorhanden:

                                    dummy = grp.clone()
                                    QgsProject.instance().layerTreeRoot().insertChildNode(-1,dummy)
                                    grp.parent().removeChildNode(grp)


                    else:   # die Layer werden NICHT in einen self.gruppenlayer geladen
                            # sollen aber nach unten verschoben werden

                        if nach_unten:


                            # wiviele layer sind in der gruppe bereits vorhanden?
                            baum = QgsLayerTreeModel(QgsProject.instance().layerTreeRoot())
                            anzahl_top_level_eintraege = baum.rowCount()
                            baum = None # Sonst Absturz bei grp.parent().removeChildNode(grp) da baum auf ein Nichts refenrenziert!


                            zwtsch = QgsProject.instance().layerTreeRoot().findLayer(self.lyr.id()) #der geladene layer
                            dummy = zwtsch.clone()

                            # den neuen ganz hinten einsetzen
                            QgsProject.instance().layerTreeRoot().insertChildNode(anzahl_top_level_eintraege,dummy)
                            zwtsch.parent().removeChildNode(zwtsch)


                    # abschließend schauen ob der Layer aufgeklappt ist
                    # und das flag setzen - beim Verschieben in die Gruppenlayer
                    # verändert sich das nämlich manchmal...

                    if (self.legends.item(j).attributes().namedItem("open").nodeValue() == "false") and not (self.lyr is None):

                        dummy.setExpanded(False)


                    elif (self.legends.item(j).attributes().namedItem("open").nodeValue() == "true") and not (self.lyr is None):

                        dummy.setExpanded(True)


                    # der nachfolgende Code erzwingt eine Aktualisierung
                    # der Legende und des MapWindow
                    # Ansonsten kanns im Mapwindow Darstellungsprobleme geben! Wieso??

                    if not self.lyr is None:

                        anzeigename = self.lyr.name()
                        self.lyr.setName(anzeigename+" ")
                        self.lyr.setName(anzeigename)

                    else:

                        QtWidgets.QMessageBox.about(None, "Achtung", "Layer " + self.legends.item(j).attributes().namedItem("name").nodeValue() + " nicht gefunden!")


                    # unbedingt ALLES DEselektieren, sonst Probleme mit Reihenfolge
                    self.iface.layerTreeView().setCurrentLayer(None)    # None entspricht einem Null Pointer -> Auswahl wird entfernt -> nicht ausgewählt

                    #Unbedingt zurücksetzen sonst kanns beim wiederholten
                    #laden des gleichen Projektfiles einen Fehler geben:
                    #wenn nämlich die Schleife erneut beginnt, nicht lädt und self.lyr
                    #beim vorherigen laden steht!

                    self.lyr = None

                    # und weiter in der Schleife!


        # UNBEDINGT am Schluss QGis wieder auf den usprünglichen
        # Pfad zurücksetzen

        QgsProject.instance().setFileName(CurrentPath)


        #ACHTUNG: Aus irgendeinem Grund gibts Probleme mit den Gruppenlayer: Wenn innerhalb der so angelegten Gruppen
        # ein Layer ausgewählt wird, gibts beim Laden danach einen Fehler. Es MUSS deshalb der oberste Eintrag
        # der Legende vor allem Laden als Aktueller Layer gesetzt werden!!!


        #Objekte besser löschen
        self.legends = None
        self.legendTree = None
        self.maps = None
        self.legends = None
        self.gruppen = None


        ######################################################################
        # Abschlussprüfung: sind alle da
        #prüfen ob alle Layer der Liste geladen wurden
        #das ist notwendig, da ja beim Projektladen alles passen kann aber
        #ein Layer nicht vorhanden ist
        ######################################################################
        fehler = 0
        layerzaehler = 0


        # Weg mit dem Fortschrittsbalken
        # self.info.close()


        if liste  != None:  #wenn nur ein Teil der Layer eines Projekts geladen wurde. Die Liste enthält die
                            #Namen dieser Layer

            for nd in range(len(liste)):

                for lyr_tmp_id in QgsProject.instance().mapLayers(): #alle bereits geladenen Layer durchgehen -> Dictionary

                    lyr_tmp = QgsProject.instance().mapLayer(lyr_tmp_id)
                    # Unbedingt die optionale Änderung des

                    # Anzeigenamens (z.B. DKM) mitberücksichtigen!)

                    if (ergaenzungsname != None) and self.anzeigename_aendern:
                        if  liste[nd] + "-" + ergaenzungsname == lyr_tmp.name():
                            layerzaehler = layerzaehler +1
                        elif  liste[nd].rstrip(" (a)") + "-" + ergaenzungsname + ' (a)' == lyr_tmp.name():
                            layerzaehler = layerzaehler +1

                    else:

                        if liste[nd] == lyr_tmp.name():
                            layerzaehler = layerzaehler +1



        # ACHTUNG: Wurden nicht alle in der Liste (fürs importieren übergebne Layerliste mit Layernamen) angeführten Layer
        # anhand des Layernamensim Projekt gefunden gibts
        # hier noch eine Fehlermeldung
        if not liste is None:
            if len(liste) > layerzaehler: #Ints! Dann wurde was nicht geladen
                QtWidgets.QMessageBox.about(None, "Achtung", "Nicht alle Layer aus " + pfad + " konnte(n) geladen werden!!")



        # gejointe Relationen wiederherstellen
        # aber erst ganz am Schluss!!

        for singlejoin in self.joinliste:
            for singlejoininfo in singlejoin.joininfo:
                singlejoin.joinlayer.addJoin(singlejoininfo)
Beispiel #13
0
def get_digikey_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
    '''Find the Digikey HTML page for a part number and return the URL and parse tree.'''

    def merge_price_tiers(main_tree, alt_tree):
        '''Merge the price tiers from the alternate-packaging tree into the main tree.'''
        try:
            insertion_point = main_tree.find('table', id='product-dollars').find('tr')
            for tr in alt_tree.find('table', id='product-dollars').find_all('tr'):
                insertion_point.insert_after(tr)
        except AttributeError:
            logger.log(DEBUG_OBSESSIVE, 'Problem merging price tiers for Digikey part {} with alternate packaging!'.format(pn))

    def merge_qty_avail(main_tree, alt_tree):
        '''Merge the quantities from the alternate-packaging tree into the main tree.'''
        try:
            main_qty = get_digikey_qty_avail(main_tree)
            alt_qty = get_digikey_qty_avail(alt_tree)
            if main_qty is None:
                merged_qty = alt_qty
            elif alt_qty is None:
                merged_qty = main_qty
            else:
                merged_qty = max(main_qty, alt_qty)
            if merged_qty is not None:
                insertion_point = main_tree.find('td', id='quantityAvailable').find('span', id='dkQty')
                insertion_point.string = '{}'.format(merged_qty)
        except AttributeError:
            logger.log(DEBUG_OBSESSIVE, 'Problem merging available quantities for Digikey part {} with alternate packaging!'.format(pn))

    # Use the part number to lookup the part using the site search function, unless a starting url was given.
    if url is None:
        url = 'http://www.digikey.com/scripts/DkSearch/dksus.dll?WT.z_header=search_go&lang=en&keywords=' + urlquote(
            pn + ' ' + extra_search_terms,
            safe='')
        #url = 'http://www.digikey.com/product-search/en?KeyWords=' + urlquote(pn,safe='') + '&WT.z_header=search_go'
    elif url[0] == '/':
        url = 'http://www.digikey.com' + url

    # Open the URL, read the HTML from it, and parse it into a tree structure.
    req = FakeBrowser(url)
    for _ in range(HTML_RESPONSE_RETRIES):
        try:
            response = urlopen(req)
            html = response.read()
            break
        except WEB_SCRAPE_EXCEPTIONS:
            logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))

    else: # Couldn't get a good read from the website.
        logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # Abort if the part number isn't in the HTML somewhere.
    # (Only use the numbers and letters to compare PN to HTML.)
    if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
        logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist))
        raise PartHtmlError

    # Use the following code if Javascript challenge pages are used to block scrapers.
    # try:
    # ghst = Ghost()
    # sess = ghst.start(plugins_enabled=False, download_images=False, show_scrollbars=False, javascript_enabled=False)
    # html, resources = sess.open(url)
    # print('type of HTML is {}'.format(type(html.content)))
    # html = html.content
    # except Exception as e:
    # print('Exception reading with Ghost: {}'.format(e))

    try:
        tree = BeautifulSoup(html, 'lxml')
    except Exception:
        logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # If the tree contains the tag for a product page, then return it.
    if tree.find('div', class_='product-top-section') is not None:

        # Digikey separates cut-tape and reel packaging, so we need to examine more pages
        # to get all the pricing info. But don't descend any further if limit has been reached.
        if descend > 0:
            try:
                # Find all the URLs to alternate-packaging pages for this part.
                ap_urls = [
                    ap.find('li', class_='lnkAltPack').find_all('a')[-1].get('href')
                    for ap in tree.find(
                        'div', class_='bota',
                        id='additionalPackaging').find_all(
                            'ul', class_='more-expander-item')
                ]
                logger.log(DEBUG_OBSESSIVE,'Found {} alternate packagings for {} from {}'.format(len(ap_urls), pn, dist))
                ap_trees_and_urls = []  # Initialize as empty in case no alternate packagings are found.
                try:
                    ap_trees_and_urls = [get_digikey_part_html_tree(dist, pn, 
                                     extra_search_terms, ap_url, descend=0)
                                     for ap_url in ap_urls]
                except Exception:
                    logger.log(DEBUG_OBSESSIVE,'Failed to find alternate packagings for {} from {}'.format(pn, dist))

                # Put the main tree on the list as well and then look through
                # the entire list for one that's non-reeled. Use this as the
                # main page for the part.
                ap_trees_and_urls.append((tree, url))
                if digikey_part_is_reeled(tree):
                    for ap_tree, ap_url in ap_trees_and_urls:
                        if not digikey_part_is_reeled(ap_tree):
                            # Found a non-reeled part, so use it as the main page.
                            tree = ap_tree
                            url = ap_url
                            break  # Done looking.

                # Now go through the other pages, merging their pricing and quantity
                # info into the main page.
                for ap_tree, ap_url in ap_trees_and_urls:
                    if ap_tree is tree:
                        continue  # Skip examining the main tree. It already contains its info.
                    try:
                        # Merge the pricing info from that into the main parse tree to make
                        # a single, unified set of price tiers...
                        merge_price_tiers(tree, ap_tree)
                        # and merge available quantity, using the maximum found.
                        merge_qty_avail(tree, ap_tree)
                    except AttributeError:
                        logger.log(DEBUG_OBSESSIVE,'Problem merging price/qty for {} from {}'.format(pn, dist))
                        continue
            except AttributeError as e:
                logger.log(DEBUG_OBSESSIVE,'Problem parsing URLs from product page for {} from {}'.format(pn, dist))

        return tree, url  # Return the parse tree and the URL where it came from.

    # If the tree is for a list of products, then examine the links to try to find the part number.
    if tree.find('table', id='productTable') is not None:
        logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
        if descend <= 0:
            logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
            raise PartHtmlError
        else:
            # Look for the table of products.
            products = tree.find(
                'table',
                id='productTable').find('tbody').find_all('tr')

            # Extract the product links for the part numbers from the table.
            # Extract links for both manufacturer and catalog numbers.
            product_links = [p.find('td',
                                    class_='tr-mfgPartNumber').a
                             for p in products]
            product_links.extend([p.find('td',
                                    class_='tr-dkPartNumber').a
                             for p in products])

            # Extract all the part numbers from the text portion of the links.
            part_numbers = [l.text for l in product_links]

            # Look for the part number in the list that most closely matches the requested part number.
            match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

            # Now look for the link that goes with the closest matching part number.
            for l in product_links:
                if l.text == match:
                    # Get the tree for the linked-to page and return that.
                    logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist))
                    return get_digikey_part_html_tree(dist, pn, extra_search_terms,
                                                      url=l['href'],
                                                      descend=descend - 1)

    # If the HTML contains a list of part categories, then give up.
    if tree.find('form', id='keywordSearchForm') is not None:
        logger.log(DEBUG_OBSESSIVE,'Found high-level part categories for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # I don't know what happened here, so give up.
    logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
    raise PartHtmlError
Beispiel #14
0
def get_part_html_tree(dist,
                       pn,
                       extra_search_terms='',
                       url=None,
                       descend=2,
                       local_part_html=None,
                       scrape_retries=2):
    '''Find the RS Components HTML page for a part number and return the URL and parse tree.'''

    # Use the part number to lookup the part using the site search function, unless a starting url was given.
    if url is None:
        url = 'http://it.rs-online.com/web/c/?searchTerm=' + urlquote(
            pn + ' ' + extra_search_terms, safe='')

    elif url[0] == '/':
        url = 'http://it.rs-online.com' + url
    elif url.startswith('..'):
        url = 'http://it.rs-online.com/Search/' + url

    # Open the URL, read the HTML from it, and parse it into a tree structure.
    for _ in range(scrape_retries):
        try:
            req = FakeBrowser(url)
            response = urlopen(req)
            html = response.read()
            break
        except WEB_SCRAPE_EXCEPTIONS:
            logger.log(
                DEBUG_DETAILED,
                'Exception while web-scraping {} from {}'.format(pn, dist))
            pass
    else:  # Couldn't get a good read from the website.
        logger.log(DEBUG_OBSESSIVE,
                   'No HTML page for {} from {}'.format(pn, dist))
        raise PartHtmlError

    try:
        tree = BeautifulSoup(html, 'lxml')
    except Exception:
        logger.log(DEBUG_OBSESSIVE,
                   'No HTML tree for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # Abort if the part number isn't in the HTML somewhere.
    # (Only use the numbers and letters to compare PN to HTML.)
    if re.sub('[\W_]', '',
              str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))):
        logger.log(DEBUG_OBSESSIVE,
                   'No part number {} in HTML page from {}'.format(pn, dist))
        raise PartHtmlError

    # If the tree contains the tag for a product page, then just return it.
    if tree.find('div', class_='specTableContainer') is not None:
        return tree, url

    # If the tree is for a list of products, then examine the links to try to find the part number.
    if tree.find('div', class_='srtnPageContainer') is not None:
        logger.log(DEBUG_OBSESSIVE,
                   'Found product table for {} from {}'.format(pn, dist))
        if descend <= 0:
            logger.log(DEBUG_OBSESSIVE,
                       'Passed descent limit for {} from {}'.format(pn, dist))
            raise PartHtmlError
        else:
            # Look for the table of products.
            products = tree.find_all('tr', class_='resultRow')

            # Extract the product links for the part numbers from the table.
            product_links = []
            for p in products:
                try:
                    link = p.find('a', class_='primarySearchLink').get('href')
                    if link is not None:
                        product_links.append(link)
                        # Up to now get the first url found in the list. i.e. do not choose the url based on the stock type (e.g. single unit, reel etc.)
                        return get_part_html_tree(
                            dist,
                            pn,
                            extra_search_terms,
                            url=product_links[0],
                            descend=descend - 1,
                            scrape_retries=scrape_retries)
                except AttributeError:
                    continue
                except TypeError:
                    #~ print('****************dist:',dist,'pn:**************************',pn)
                    continue

    #~ # If the tree is for a list of products, then examine the links to try to find the part number.
    #~ if tree.find('div', class_='srtnPageContainer') is not None:
    #~ if descend <= 0:
    #~ raise PartHtmlError
    #~ else:
    #~ # Look for the table of products.
    #~ products = tree.find('table',
    #~ class_='productLister',
    #~ id='sProdList').find_all('tr',
    #~ class_='altRow')

    #~ # Extract the product links for the part numbers from the table.
    #~ product_links = []
    #~ for p in products:
    #~ try:
    #~ product_links.append(
    #~ p.find('td',
    #~ class_='mftrPart').find('p',
    #~ class_='wordBreak').a)
    #~ except AttributeError:
    #~ continue

    #~ # Extract all the part numbers from the text portion of the links.
    #~ part_numbers = [l.text for l in product_links]

    #~ # Look for the part number in the list that most closely matches the requested part number.
    #~ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

    #~ # Now look for the link that goes with the closest matching part number.
    #~ for l in product_links:
    #~ if l.text == match:
    #~ # Get the tree for the linked-to page and return that.
    #~ return get_part_html_tree(dist, pn, extra_search_terms,
    #~ url=l['href'], descend=descend-1, scrape_retries=scrape_retries)

    # I don't know what happened here, so give up.
    logger.log(DEBUG_OBSESSIVE,
               'Unknown error for {} from {}'.format(pn, dist))
    raise PartHtmlError
Beispiel #15
0
    def dist_get_part_html_tree(self,
                                pn,
                                extra_search_terms='',
                                url=None,
                                descend=2):
        '''@brief Find the farnell HTML page for a part number and return the URL and parse tree.
           @param pn Part number `str()`.
           @param extra_search_terms
           @param url
           @param descend
           @return (html `str()` of the page, url)
        '''

        # Use the part number to lookup the part using the site search function, unless a starting url was given.
        if url is None:
            url = 'http://it.farnell.com/Search?storeId=10165&catalogId=15001&categoryName=&selectedCategoryId=&langId=-4&categoryIdBox=&st=' \
                + urlquote(pn, safe='')
            if extra_search_terms:
                url = url + urlquote(' ' + extra_search_terms, safe='')
        elif url[0] == '/':
            url = 'http://www.farnell.com' + url
        elif url.startswith('..'):
            url = 'http://www.farnell.com/Search/' + url

        # Open the URL, read the HTML from it, and parse it into a tree structure.
        try:
            html = self.browser.scrape_URL(url)
        except:
            self.logger.log(
                DEBUG_OBSESSIVE,
                'No HTML page for {} from {}'.format(pn, self.name))
            raise PartHtmlError

        # Abort if the part number isn't in the HTML somewhere.
        # (Only use the numbers and letters to compare PN to HTML.)
        if re.sub('[\W_]',
                  '', str.lower(pn)) not in re.sub('[\W_]', '',
                                                   str.lower(str(html))):
            self.logger.log(
                DEBUG_OBSESSIVE,
                'No part number {} in HTML page from {}'.format(pn, self.name))
            raise PartHtmlError

        try:
            tree = BeautifulSoup(html, 'lxml')
        except Exception:
            self.logger.log(
                DEBUG_OBSESSIVE,
                'No HTML tree for {} from {}'.format(pn, self.name))
            raise PartHtmlError

        # If the tree contains the tag for a product page, then just return it.
        if tree.find('div', class_='productDisplay', id='page') is not None:
            return tree, url

        # If the tree is for a list of products, then examine the links to try to find the part number.
        if tree.find('table', class_='productLister',
                     id='sProdList') is not None:
            self.logger.log(
                DEBUG_OBSESSIVE,
                'Found product table for {} from {}'.format(pn, self.name))
            if descend <= 0:
                self.logger.log(
                    DEBUG_OBSESSIVE,
                    'Passed descent limit for {} from {}'.format(
                        pn, self.name))
                raise PartHtmlError
            else:
                # Look for the table of products.
                products = tree.find('table', class_='productLister').find_all(
                    'tr', class_='altRow')

                # Extract the product links for the part numbers from the table.
                product_links = []
                for p in products:
                    try:
                        product_links.append(
                            p.find('td', class_='mftrPart').find('a'))
                    except AttributeError:
                        continue
                #print('>>>  ',pn,products,product_links)#TODO

                # Extract all the part numbers from the text portion of the links.
                part_numbers = [l.text for l in product_links]

                # Look for the part number in the list that most closely matches the requested part number.
                match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

                # Now look for the link that goes with the closest matching part number.
                for l in product_links:
                    if l.text == match:
                        # Get the tree for the linked-to page and return that.
                        self.logger.log(
                            DEBUG_OBSESSIVE,
                            'Selecting {} from product table for {} from {}'.
                            format(l.text.strip(), pn, self.name))
                        return self.dist_get_part_html_tree(
                            pn,
                            extra_search_terms,
                            url=l.get('href', ''),
                            descend=descend - 1)

        # I don't know what happened here, so give up.
        self.logger.log(DEBUG_OBSESSIVE,
                        'Unknown error for {} from {}'.format(pn, self.name))
        self.logger.log(DEBUG_HTTP_RESPONSES, 'Response was %s' % html)
        raise PartHtmlError
Beispiel #16
0
def getXYT(xyt_filename, match_only=False):
    # Read in a .fits or .npz file containing the output of the RHT.
    # If match_only is given, and a dictionary of Keys:
    #     This will return whether ALL keys are found in the data of the given file
    # Else:
    #     This will return the image coordinates of significant linearity, and the theta power spectrum at those coords.
    #     This will return as two integer arrays of some_length, and an ntheta*some_length array of theta power

    if not os.path.isfile(xyt_filename):
        # Fast Failure Case - This file does not exist.
        if match_only:
            return False
        else:
            raise ValueError(
                'Input xyt_filename in getXYT matches no existing file')
    else:
        # Attempts to extract header information for Matching, or else the data itself
        if xyt_filename.endswith('.npz'):
            # Allows very large files to be read in.
            data = np.load(xyt_filename, mmap_mode='r')
            if match_only:
                try:
                    return all([
                        match_only[x] == data[str.lower(x)]
                        for x in list(match_only.keys())
                    ])
                except KeyError:
                    return False
            Hi = data['hi']
            Hj = data['hj']
            Hthets = data['hthets']

        elif xyt_filename.endswith('.fits'):
            hdu_list = fits.open(
                xyt_filename,
                mode='readonly',
                memmap=True,
                save_backup=False,
                checksum=True)  #Allows for reading in very large files!
            header = hdu_list[0].header
            if match_only:
                try:
                    return all([
                        match_only[x] == header[str.upper(x)]
                        for x in list(match_only.keys())
                    ])
                except KeyError:
                    return False
            data = hdu_list[1].data
            Hi = data['hi']
            Hj = data['hj']
            Hthets = data['hthets']

        else:
            raise ValueError(
                'Supported input types in getXYT include .npz and .fits only')

    rebuild = None
    # Formats output properly
    if rebuild and filepath is not None:
        # Can recreate an entire 3D array of mostly 0s.
        data = getData(filepath)
        datay, datax = data.shape
        ntheta = Hthets[0].shape
        if BUFFER:
            xyt = np.memmap(tempfile.TemporaryFile(),
                            dtype=DTYPE,
                            mode='w+',
                            shape=(datay, datax, ntheta))
            xyt.fill(0.0)
        else:
            print(
                'Warning: Reconstructing very large array in memory! Set BUFFER to True!'
            )
            xyt = np.zeros((datay, datax, ntheta))
        coords = list(zip(Hj, Hi))
        for c in range(len(coords)):
            j, i = coords[c]
            xyt[j, i, :] = Hthets[c]
        return xyt
    else:
        # Returns the sparse, memory mapped form only.
        return Hi, Hj, Hthets
Beispiel #17
0
def get_digikey_part_html_tree(dist,
                               pn,
                               extra_search_terms='',
                               url=None,
                               descend=2,
                               local_part_html=None):
    '''Find the Digikey HTML page for a part number and return the URL and parse tree.'''
    def merge_price_tiers(main_tree, alt_tree):
        '''Merge the price tiers from the alternate-packaging tree into the main tree.'''
        try:
            insertion_point = main_tree.find('table',
                                             id='product-dollars').find('tr')
            for tr in alt_tree.find('table',
                                    id='product-dollars').find_all('tr'):
                insertion_point.insert_after(tr)
        except AttributeError:
            logger.log(
                DEBUG_OBSESSIVE,
                'Problem merging price tiers for Digikey part {} with alternate packaging!'
                .format(pn))
            pass

    def merge_qty_avail(main_tree, alt_tree):
        '''Merge the quantities from the alternate-packaging tree into the main tree.'''
        try:
            main_qty = get_digikey_qty_avail(main_tree)
            alt_qty = get_digikey_qty_avail(alt_tree)
            if main_qty is None:
                merged_qty = alt_qty
            elif alt_qty is None:
                merged_qty = main_qty
            else:
                merged_qty = max(main_qty, alt_qty)
            if merged_qty is not None:
                insertion_point = main_tree.find(
                    'td', id='quantityAvailable').find('span', id='dkQty')
                insertion_point.string = '{}'.format(merged_qty)
        except AttributeError:
            logger.log(
                DEBUG_OBSESSIVE,
                'Problem merging available quantities for Digikey part {} with alternate packaging!'
                .format(pn))
            pass

    # Use the part number to lookup the part using the site search function, unless a starting url was given.
    if url is None:
        url = 'http://www.digikey.com/scripts/DkSearch/dksus.dll?WT.z_header=search_go&lang=en&keywords=' + urlquote(
            pn + ' ' + extra_search_terms, safe='')
        #url = 'http://www.digikey.com/product-search/en?KeyWords=' + urlquote(pn,safe='') + '&WT.z_header=search_go'
    elif url[0] == '/':
        url = 'http://www.digikey.com' + url

    # Open the URL, read the HTML from it, and parse it into a tree structure.
    req = FakeBrowser(url)
    for _ in range(HTML_RESPONSE_RETRIES):
        try:
            response = urlopen(req)
            html = response.read()
            break
        except WEB_SCRAPE_EXCEPTIONS:
            logger.log(
                DEBUG_DETAILED,
                'Exception while web-scraping {} from {}'.format(pn, dist))
            pass
    else:  # Couldn't get a good read from the website.
        logger.log(DEBUG_OBSESSIVE,
                   'No HTML page for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # Abort if the part number isn't in the HTML somewhere.
    # (Only use the numbers and letters to compare PN to HTML.)
    if re.sub('[\W_]', '',
              str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))):
        logger.log(DEBUG_OBSESSIVE,
                   'No part number {} in HTML page from {}'.format(pn, dist))
        raise PartHtmlError

    # Use the following code if Javascript challenge pages are used to block scrapers.
    # try:
    # ghst = Ghost()
    # sess = ghst.start(plugins_enabled=False, download_images=False, show_scrollbars=False, javascript_enabled=False)
    # html, resources = sess.open(url)
    # print('type of HTML is {}'.format(type(html.content)))
    # html = html.content
    # except Exception as e:
    # print('Exception reading with Ghost: {}'.format(e))

    try:
        tree = BeautifulSoup(html, 'lxml')
    except Exception:
        logger.log(DEBUG_OBSESSIVE,
                   'No HTML tree for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # If the tree contains the tag for a product page, then return it.
    if tree.find('div', class_='product-top-section') is not None:

        # Digikey separates cut-tape and reel packaging, so we need to examine more pages
        # to get all the pricing info. But don't descend any further if limit has been reached.
        if descend > 0:
            try:
                # Find all the URLs to alternate-packaging pages for this part.
                ap_urls = [
                    ap.find('li',
                            class_='lnkAltPack').find_all('a')[-1].get('href')
                    for ap in tree.find(
                        'div', class_='bota', id='additionalPackaging').
                    find_all('ul', class_='more-expander-item')
                ]
                logger.log(
                    DEBUG_OBSESSIVE,
                    'Found {} alternate packagings for {} from {}'.format(
                        len(ap_urls), pn, dist))
                try:
                    ap_trees_and_urls = [
                        get_digikey_part_html_tree(dist,
                                                   pn,
                                                   extra_search_terms,
                                                   ap_url,
                                                   descend=0)
                        for ap_url in ap_urls
                    ]
                except Exception:
                    logger.log(
                        DEBUG_OBSESSIVE,
                        'Failed to find alternate packagings for {} from {}'.
                        format(pn, dist))

                # Put the main tree on the list as well and then look through
                # the entire list for one that's non-reeled. Use this as the
                # main page for the part.
                ap_trees_and_urls.append((tree, url))
                if digikey_part_is_reeled(tree):
                    for ap_tree, ap_url in ap_trees_and_urls:
                        if not digikey_part_is_reeled(ap_tree):
                            # Found a non-reeled part, so use it as the main page.
                            tree = ap_tree
                            url = ap_url
                            break  # Done looking.

                # Now go through the other pages, merging their pricing and quantity
                # info into the main page.
                for ap_tree, ap_url in ap_trees_and_urls:
                    if ap_tree is tree:
                        continue  # Skip examining the main tree. It already contains its info.
                    try:
                        # Merge the pricing info from that into the main parse tree to make
                        # a single, unified set of price tiers...
                        merge_price_tiers(tree, ap_tree)
                        # and merge available quantity, using the maximum found.
                        merge_qty_avail(tree, ap_tree)
                    except AttributeError:
                        logger.log(
                            DEBUG_OBSESSIVE,
                            'Problem merging price/qty for {} from {}'.format(
                                pn, dist))
                        continue
            except AttributeError:
                logger.log(
                    DEBUG_OBSESSIVE,
                    'Problem parsing URLs from product page for {} from {}'.
                    format(pn, dist))
                pass
        return tree, url  # Return the parse tree and the URL where it came from.

    # If the tree is for a list of products, then examine the links to try to find the part number.
    if tree.find('table', id='productTable') is not None:
        logger.log(DEBUG_OBSESSIVE,
                   'Found product table for {} from {}'.format(pn, dist))
        if descend <= 0:
            logger.log(DEBUG_OBSESSIVE,
                       'Passed descent limit for {} from {}'.format(pn, dist))
            raise PartHtmlError
        else:
            # Look for the table of products.
            products = tree.find(
                'table', id='productTable').find('tbody').find_all('tr')

            # Extract the product links for the part numbers from the table.
            # Extract links for both manufacturer and catalog numbers.
            product_links = [
                p.find('td', class_='tr-mfgPartNumber').a for p in products
            ]
            product_links.extend(
                [p.find('td', class_='tr-dkPartNumber').a for p in products])

            # Extract all the part numbers from the text portion of the links.
            part_numbers = [l.text for l in product_links]

            # Look for the part number in the list that most closely matches the requested part number.
            match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

            # Now look for the link that goes with the closest matching part number.
            for l in product_links:
                if l.text == match:
                    # Get the tree for the linked-to page and return that.
                    logger.log(
                        DEBUG_OBSESSIVE,
                        'Selecting {} from product table for {} from {}'.
                        format(l.text, pn, dist))
                    return get_digikey_part_html_tree(dist,
                                                      pn,
                                                      extra_search_terms,
                                                      url=l['href'],
                                                      descend=descend - 1)

    # If the HTML contains a list of part categories, then give up.
    if tree.find('form', id='keywordSearchForm') is not None:
        logger.log(
            DEBUG_OBSESSIVE,
            'Found high-level part categories for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # I don't know what happened here, so give up.
    logger.log(DEBUG_OBSESSIVE,
               'Unknown error for {} from {}'.format(pn, dist))
    raise PartHtmlError
Beispiel #18
0
def get_farnell_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
    '''Find the farnell HTML page for a part number and return the URL and parse tree.'''

    # Use the part number to lookup the part using the site search function, unless a starting url was given.
    if url is None:
#        url = 'http://www.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15003&langId=-1&storeId=10194&gs=true&st=' + urlquote(
#            pn + ' ' + extra_search_terms,
#            safe='')
        url = 'http://it.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15001&langId=-4&storeId=10165&gs=true&st=' + urlquote(
            pn + ' ' + extra_search_terms,
            safe='')

    elif url[0] == '/':
        url = 'http://www.farnell.com' + url
    elif url.startswith('..'):
        url = 'http://www.farnell.com/Search/' + url

    # Open the URL, read the HTML from it, and parse it into a tree structure.
    for _ in range(HTML_RESPONSE_RETRIES):
        try:
            req = FakeBrowser(url)
            response = urlopen(req)
            html = response.read()
            break
        except WEB_SCRAPE_EXCEPTIONS:
            logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
            pass
    else: # Couldn't get a good read from the website.
        logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # Abort if the part number isn't in the HTML somewhere.
    # (Only use the numbers and letters to compare PN to HTML.)
    if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
        logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist))
        raise PartHtmlError

    try:
        tree = BeautifulSoup(html, 'lxml')
    except Exception:
        logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # If the tree contains the tag for a product page, then just return it.
    if tree.find('div', class_='productDisplay', id='page') is not None:
        return tree, url

    # If the tree is for a list of products, then examine the links to try to find the part number.
    if tree.find('table', class_='productLister', id='sProdList') is not None:
        logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
        if descend <= 0:
            logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
            raise PartHtmlError
        else:
            # Look for the table of products.
            products = tree.find('table',
                                 class_='productLister',
                                 id='sProdList').find_all('tr',
                                                          class_='altRow')

            # Extract the product links for the part numbers from the table.
            product_links = []
            for p in products:
                try:
                    product_links.append(
                        p.find('td',
                               class_='mftrPart').find('p',
                                                       class_='wordBreak').a)
                except AttributeError:
                    continue

            # Extract all the part numbers from the text portion of the links.
            part_numbers = [l.text for l in product_links]

            # Look for the part number in the list that most closely matches the requested part number.
            match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

            # Now look for the link that goes with the closest matching part number.
            for l in product_links:
                if l.text == match:
                    # Get the tree for the linked-to page and return that.
                    logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist))
                    return get_farnell_part_html_tree(dist, pn, extra_search_terms,
                                url=l['href'], descend=descend-1)

    # I don't know what happened here, so give up.
    logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
    raise PartHtmlError
Beispiel #19
0
def direk_laden(PGdb, lyr_name, shapename, pfad, iface, subset = None):


    # Der Username der verwendet werden soll
    if len(auth_user_global) > 0:    # Ist belegt
        auth_user = auth_user_global[0]
    else:
        auth_user = None


    iface.layerTreeView().setCurrentLayer(None) # Damit von ganz aussen in der LEgende angefangen wird!

    try:
        db = PGdb
        shapename_ohne_suffix = shapename.replace('.shp','')
        shapename_ohne_suffix = str(str.strip(str.lower(shapename_ohne_suffix)))
        if db != None:

            try:  # Geodatenbank

                ################################################
                # Geometriespalte bestimmen -- geht nur mit OGR
                try:
                    if auth_user == None:
                        outputdb = ogr.Open('pg: host =' + db.hostName() + ' dbname =' + db.databaseName() + ' schemas=' + schema + ' port=' + str(db.port()))
                    else:
                        outputdb = ogr.Open('pg: host =' + db.hostName() + ' dbname =' + db.databaseName() + ' schemas=' + schema + ' port=' + str(db.port()) + ' user='******'the_geom'

                ################################################


                #das Laden der Daten
                uri = QgsDataSourceUri()
                uri.setConnection(db.hostName(),str(db.port()),db.databaseName(),'','')



                if not auth_user == None:
                    uri.setUsername(auth_user)
                uri.setDataSource('vorarlberg', shapename_ohne_suffix, geom_column)
                erg_lyr = QgsVectorLayer(uri.uri(), lyr_name,"postgres")



            # prüfen ob erfolgreich geladen
                if not erg_lyr.isValid():   # nicht erfolgreich
                    QtWidgets.QMessageBox.about(None, "Fehler", "Layer " + shapename_ohne_suffix + " in der Datenbank nicht gefunden - es wird aufs Filesystem umgeschaltet")
                    erg_lyr = QgsVectorLayer(pfad + '/' + shapename, lyr_name,"ogr")

            except Exception: # noch schlechter
                QtWidgets.QMessageBox.about(None, "Fehler", "Layer " + shapename_ohne_suffix + " in der Datenbank nicht gefunden - es wird aufs Filesystem umgeschaltet")
                erg_lyr = QgsVectorLayer(pfad + '/' + shapename, lyr_name,"ogr")

        elif db == None:
            erg_lyr = QgsVectorLayer(pfad + '/' + shapename, lyr_name,"ogr")



        # Hier die attributive Auswahl
        if subset != None:
            erg_lyr.setSubsetString(subset)

        # prüfen ob was sinnvolles geladen werden konnte

        if erg_lyr.isValid():
            return erg_lyr

        else:
            QtWidgets.QMessageBox.about(None, "Fehler", "Layer " + shapename + " konnte nicht geladen werden")

            return None



    except Exception as b:

        return None
Beispiel #20
0
def get_part_html_tree(dist,
                       pn,
                       extra_search_terms='',
                       url=None,
                       descend=2,
                       local_part_html=None):
    '''Find the TME HTML page for a part number and return the URL and parse tree.'''

    # Use the part number to lookup the part using the site search function, unless a starting url was given.
    if url is None:
        url = 'https://www.tme.eu/en/katalog/?search=' + urlquote(
            pn + ' ' + extra_search_terms, safe='')
    elif url[0] == '/':
        url = 'https://www.tme.eu' + url

    # Open the URL, read the HTML from it, and parse it into a tree structure.
    req = FakeBrowser(url)
    for _ in range(HTML_RESPONSE_RETRIES):
        try:
            response = urlopen(req)
            html = response.read()
            break
        except WEB_SCRAPE_EXCEPTIONS:
            logger.log(
                DEBUG_DETAILED,
                'Exception while web-scraping {} from {}'.format(pn, dist))
            pass
    else:  # Couldn't get a good read from the website.
        logger.log(DEBUG_OBSESSIVE,
                   'No HTML page for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # Abort if the part number isn't in the HTML somewhere.
    # (Only use the numbers and letters to compare PN to HTML.)
    if re.sub('[\W_]', '',
              str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))):
        logger.log(
            DEBUG_OBSESSIVE,
            'No part number {} in HTML page from {} ({})'.format(
                pn, dist, url))
        raise PartHtmlError

    try:
        tree = BeautifulSoup(html, 'lxml')
    except Exception:
        logger.log(DEBUG_OBSESSIVE,
                   'No HTML tree for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # If the tree contains the tag for a product page, then just return it.
    if tree.find('div', id='ph') is not None:
        return tree, url

    # If the tree is for a list of products, then examine the links to try to find the part number.
    if tree.find('table', id="products") is not None:
        logger.log(DEBUG_OBSESSIVE,
                   'Found product table for {} from {}'.format(pn, dist))
        if descend <= 0:
            logger.log(DEBUG_OBSESSIVE,
                       'Passed descent limit for {} from {}'.format(pn, dist))
            raise PartHtmlError
        else:
            # Look for the table of products.
            products = tree.find('table', id="products").find_all(
                'tr', class_=('product-row'))

            # Extract the product links for the part numbers from the table.
            product_links = []
            for p in products:
                for a in p.find('td', class_='product').find_all('a'):
                    product_links.append(a)

            # Extract all the part numbers from the text portion of the links.
            part_numbers = [l.text for l in product_links]

            # Look for the part number in the list that most closely matches the requested part number.
            match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

            # Now look for the link that goes with the closest matching part number.
            for l in product_links:
                if (not l['href'].startswith('./katalog')) and l.text == match:
                    # Get the tree for the linked-to page and return that.
                    logger.log(
                        DEBUG_OBSESSIVE,
                        'Selecting {} from product table for {} from {}'.
                        format(l.text, pn, dist))
                    # TODO: The current implementation does up to four HTTP
                    # requests per part (search, part details page for TME P/N,
                    # XHR for pricing information, and XHR for stock
                    # availability). This is mainly for the compatibility with
                    # other distributor implementations (html_tree gets passed
                    # to all functions).
                    # A modified implementation (which would pass JSON data
                    # obtained by the XHR instead of the HTML DOM tree) might be
                    # able to do the same with just two requests (search for TME
                    # P/N, XHR for pricing and stock availability).
                    return get_part_html_tree(dist,
                                              pn,
                                              extra_search_terms,
                                              url=l['href'],
                                              descend=descend - 1)

    # I don't know what happened here, so give up.
    logger.log(DEBUG_OBSESSIVE,
               'Unknown error for {} from {}'.format(pn, dist))
    raise PartHtmlError
Beispiel #21
0
def get_farnell_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
    '''Find the farnell HTML page for a part number and return the URL and parse tree.'''

    # Use the part number to lookup the part using the site search function, unless a starting url was given.
    if url is None:
#        url = 'http://www.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15003&langId=-1&storeId=10194&gs=true&st=' + urlquote(
#            pn + ' ' + extra_search_terms,
#            safe='')
        url = 'http://it.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15001&langId=-4&storeId=10165&gs=true&st=' + urlquote(
            pn + ' ' + extra_search_terms,
            safe='')

    elif url[0] == '/':
        url = 'http://www.farnell.com' + url
    elif url.startswith('..'):
        url = 'http://www.farnell.com/Search/' + url

    # Open the URL, read the HTML from it, and parse it into a tree structure.
    for _ in range(HTML_RESPONSE_RETRIES):
        try:
            req = FakeBrowser(url)
            response = urlopen(req)
            html = response.read()
            break
        except WEB_SCRAPE_EXCEPTIONS:
            logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
            pass
    else: # Couldn't get a good read from the website.
        logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # Abort if the part number isn't in the HTML somewhere.
    # (Only use the numbers and letters to compare PN to HTML.)
    if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
        logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist))
        raise PartHtmlError

    try:
        tree = BeautifulSoup(html, 'lxml')
    except Exception:
        logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # If the tree contains the tag for a product page, then just return it.
    if tree.find('div', class_='productDisplay', id='page') is not None:
        return tree, url

    # If the tree is for a list of products, then examine the links to try to find the part number.
    if tree.find('table', class_='productLister', id='sProdList') is not None:
        logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
        if descend <= 0:
            logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
            raise PartHtmlError
        else:
            # Look for the table of products.
            products = tree.find('table',
                                 class_='productLister',
                                 id='sProdList').find_all('tr',
                                                          class_='altRow')

            # Extract the product links for the part numbers from the table.
            product_links = []
            for p in products:
                try:
                    product_links.append(p.find('td', class_='mftrPart').find('a'))
                except AttributeError:
                    continue

            # Extract all the part numbers from the text portion of the links.
            part_numbers = [l.text for l in product_links]

            # Look for the part number in the list that most closely matches the requested part number.
            match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

            # Now look for the link that goes with the closest matching part number.
            for l in product_links:
                if l.text == match:
                    # Get the tree for the linked-to page and return that.
                    logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist))
                    return get_farnell_part_html_tree(dist, pn, extra_search_terms,
                                url=l['href'], descend=descend-1)

    # I don't know what happened here, so give up.
    logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
    raise PartHtmlError
Beispiel #22
0
    def dist_get_part_html_tree(self,
                                pn,
                                extra_search_terms='',
                                url=None,
                                descend=2):
        '''@brief Find the RS Components HTML page for a part number and return the URL and parse tree.
           @param pn Part number `str()`.
           @param extra_search_terms
           @param url
           @param descend
           @return (html `str()` of the page, url)
        '''

        # Use the part number to lookup the part using the site search function, unless a starting url was given.
        if url is None:
            url = 'http://it.rs-online.com/web/c/?searchTerm=' + urlquote(
                pn, safe='')
            if extra_search_terms:
                url = url + urlquote(' ' + extra_search_terms, safe='')
        elif url[0] == '/':
            url = 'http://it.rs-online.com' + url
        elif url.startswith('..'):
            url = 'http://it.rs-online.com/Search/' + url

        # Open the URL, read the HTML from it, and parse it into a tree structure.
        try:
            html = self.browser.scrape_URL(url)
        except:
            self.logger.log(
                DEBUG_OBSESSIVE,
                'No HTML page for {} from {}'.format(pn, self.name))
            raise PartHtmlError

        try:
            tree = BeautifulSoup(html, 'lxml')
        except Exception:
            self.logger.log(
                DEBUG_OBSESSIVE,
                'No HTML tree for {} from {}'.format(pn, self.name))
            raise PartHtmlError

        # Abort if the part number isn't in the HTML somewhere.
        # (Only use the numbers and letters to compare PN to HTML.)
        if re.sub('[\W_]',
                  '', str.lower(pn)) not in re.sub('[\W_]', '',
                                                   str.lower(str(html))):
            self.logger.log(
                DEBUG_OBSESSIVE,
                'No part number {} in HTML page from {}'.format(pn, self.name))
            raise PartHtmlError

        # If the tree contains the tag for a product page, then just return it.
        if tree.find('div', class_='advLineLevelContainer'):
            return tree, url

        # If the tree is for a list of products, then examine the links to try to find the part number.
        if tree.find('div', class_=('resultsTable',
                                    'results-table-container')) is not None:
            self.logger.log(
                DEBUG_OBSESSIVE,
                'Found product table for {} from {}'.format(pn, self.name))
            if descend <= 0:
                self.logger.log(
                    DEBUG_OBSESSIVE,
                    'Passed descent limit for {} from {}'.format(
                        pn, self.name))
                raise PartHtmlError
            else:
                # Look for the table of products.
                products = tree.find('table', id='results-table').find_all(
                    'tr', class_='resultRow')

                # Extract the product links for the part numbers from the table.
                product_links = [
                    p.find('a', class_='product-name').get('href')
                    for p in products
                ]

                # Extract all the part numbers from the text portion of the links.
                part_numbers = [
                    p.find('span', class_='text-contents').get_text()
                    for p in products
                ]

                # Look for the part number in the list that most closely matches the requested part number.
                match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

                # Now look for the link that goes with the closest matching part number.
                for i in range(len(product_links)):
                    if part_numbers[i] == match:
                        # Get the tree for the linked-to page and return that.
                        self.logger.log(
                            DEBUG_OBSESSIVE,
                            'Selecting {} from product table for {} from {}'.
                            format(part_numbers[i], pn, self.name))
                        return self.dist_get_part_html_tree(
                            pn,
                            extra_search_terms,
                            url=product_links[i],
                            descend=descend - 1)

        # I don't know what happened here, so give up.
        self.logger.log(DEBUG_OBSESSIVE,
                        'Unknown error for {} from {}'.format(pn, self.name))
        self.logger.log(DEBUG_HTTP_RESPONSES, 'Response was %s' % html)
        raise PartHtmlError
Beispiel #23
0
def get_part_html_tree(dist,
                       pn,
                       extra_search_terms='',
                       url=None,
                       descend=2,
                       local_part_html=None,
                       scrape_retries=2):
    '''@brief Find the Mouser HTML page for a part number and return the URL and parse tree.
       @param dist
       @param pn Part number `str()`.
       @param extra_search_terms
       @param url
       @param descend
       @param local_part_html
       @param scrape_retries `int` Quantity of retries in case of fail.
       @return (html `str()` of the page, url)
    '''

    # Use the part number to lookup the part using the site search function, unless a starting url was given.
    if url is None:
        url = 'https://www.mouser.com/Search/Refine.aspx?Keyword=' + urlquote(
            pn + ' ' + extra_search_terms, safe='')
    elif url[0] == '/':
        url = 'https://www.mouser.com' + url
    elif url.startswith('..'):
        url = 'https://www.mouser.com/Search/' + url

    # Open the URL, read the HTML from it, and parse it into a tree structure.
    req = FakeBrowser(url)
    req.add_header('Cookie', 'preferences=ps=www2&pl=en-US&pc_www2=USDe')
    for _ in range(scrape_retries):
        try:
            response = urlopen(req)
            html = response.read()
            break
        except WEB_SCRAPE_EXCEPTIONS:
            logger.log(
                DEBUG_DETAILED,
                'Exception while web-scraping {} from {}'.format(pn, dist))
            pass
    else:  # Couldn't get a good read from the website.
        logger.log(DEBUG_OBSESSIVE,
                   'No HTML page for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # Abort if the part number isn't in the HTML somewhere.
    # (Only use the numbers and letters to compare PN to HTML.)
    if re.sub('[\W_]', '',
              str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))):
        logger.log(DEBUG_OBSESSIVE,
                   'No part number {} in HTML page from {}'.format(pn, dist))
        raise PartHtmlError

    try:
        tree = BeautifulSoup(html, 'lxml')
    except Exception:
        logger.log(DEBUG_OBSESSIVE,
                   'No HTML tree for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # If the tree contains the tag for a product page, then just return it.
    if tree.find('div', id='pdpPricingAvailability') is not None:
        return tree, url

    # If the tree is for a list of products, then examine the links to try to find the part number.
    if tree.find('div', id='searchResultsTbl') is not None:
        logger.log(DEBUG_OBSESSIVE,
                   'Found product table for {} from {}'.format(pn, dist))
        if descend <= 0:
            logger.log(DEBUG_OBSESSIVE,
                       'Passed descent limit for {} from {}'.format(pn, dist))
            raise PartHtmlError
        else:
            # Look for the table of products.
            products = tree.find('table',
                                 class_='SearchResultsTable').find_all(
                                     'tr',
                                     class_=('SearchResultsRowOdd',
                                             'SearchResultsRowEven'))

            # Extract the product links for the part numbers from the table.
            product_links = [
                p.find('div', class_='mfrDiv').a for p in products
            ]

            # Extract all the part numbers from the text portion of the links.
            part_numbers = [l.text for l in product_links]

            # Look for the part number in the list that most closely matches the requested part number.
            match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

            # Now look for the link that goes with the closest matching part number.
            for l in product_links:
                if l.text == match:
                    # Get the tree for the linked-to page and return that.
                    logger.log(
                        DEBUG_OBSESSIVE,
                        'Selecting {} from product table for {} from {}'.
                        format(l.text, pn, dist))
                    return get_part_html_tree(dist,
                                              pn,
                                              extra_search_terms,
                                              url=l.get('href', ''),
                                              descend=descend - 1,
                                              scrape_retries=scrape_retries)

    # I don't know what happened here, so give up.
    logger.log(DEBUG_OBSESSIVE,
               'Unknown error for {} from {}'.format(pn, dist))
    raise PartHtmlError
Beispiel #24
0
    def dist_get_part_html_tree(self,
                                pn,
                                extra_search_terms='',
                                url=None,
                                descend=2):
        '''@brief Find the Mouser HTML page for a part number and return the URL and parse tree.
           @param pn Part number `str()`.
           @param extra_search_terms
           @param url
           @param descend
           @return (html `str()` of the page, url)
        '''

        # Use the part number to lookup the part using the site search function, unless a starting url was given.
        if url is None:
            url = distributor_dict[self.name]['site']['url'] + \
                '/Search/Refine.aspx?Keyword=' + urlquote(pn, safe='')
            if extra_search_terms:
                url = url + urlquote(' ' + extra_search_terms, safe='')
        elif url[0] == '/':
            url = distributor_dict[self.name]['site']['url'] + url
        elif url.startswith('..'):
            url = distributor_dict[self.name]['site']['url'] + '/Search/' + url

        # Open the URL, read the HTML from it, and parse it into a tree structure.
        try:
            html = self.browser.scrape_URL(url)
        except Exception as ex:
            self.logger.log(
                DEBUG_OBSESSIVE,
                'No HTML page for {} from {}'.format(pn, self.name))
            raise PartHtmlError

        # Abort if the part number isn't in the HTML somewhere.
        # (Only use the numbers and letters to compare PN to HTML.)
        if re.sub('[\W_]',
                  '', str.lower(pn)) not in re.sub('[\W_]', '',
                                                   str.lower(str(html))):
            self.logger.log(
                DEBUG_OBSESSIVE,
                'No part number {} in HTML page from {}'.format(pn, self.name))
            raise PartHtmlError

        try:
            tree = BeautifulSoup(html, 'lxml')
        except Exception:
            self.logger.log(
                DEBUG_OBSESSIVE,
                'No HTML tree for {} from {}'.format(pn, self.name))
            raise PartHtmlError

        # If the tree contains the tag for a product page, then just return it.
        if tree.find('div', id='pdpPricingAvailability') is not None:
            return tree, url

        # If the tree is for a list of products, then examine the links to try to find the part number.
        if tree.find('div', id='searchResultsTbl') is not None:
            self.logger.log(
                DEBUG_OBSESSIVE,
                'Found product table for {} from {}'.format(pn, self.name))
            if descend <= 0:
                self.logger.log(
                    DEBUG_OBSESSIVE,
                    'Passed descent limit for {} from {}'.format(
                        pn, self.name))
                raise PartHtmlError
            else:
                # Look for the table of products.
                products = tree.find('table',
                                     class_='SearchResultsTable').find_all(
                                         'tr',
                                         class_=('SearchResultsRowOdd',
                                                 'SearchResultsRowEven'))

                # Extract the product links for the part numbers from the table.
                product_links = [
                    p.find('div', class_='mfrDiv').a for p in products
                ]

                # Extract all the part numbers from the text portion of the links.
                part_numbers = [l.text for l in product_links]

                # Look for the part number in the list that most closely matches the requested part number.
                match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

                # Now look for the link that goes with the closest matching part number.
                for l in product_links:
                    if l.text == match:
                        # Get the tree for the linked-to page and return that.
                        self.logger.log(
                            DEBUG_OBSESSIVE,
                            'Selecting {} from product table for {} from {}'.
                            format(l.text, pn, self.name))
                        return self.dist_get_part_html_tree(
                            pn,
                            extra_search_terms,
                            url=l.get('href', ''),
                            descend=descend - 1)

        # I don't know what happened here, so give up.
        self.logger.log(DEBUG_OBSESSIVE,
                        'Unknown error for {} from {}'.format(pn, self.name))
        self.logger.log(DEBUG_HTTP_RESPONSES, 'Response was %s' % html)
        raise PartHtmlError
Beispiel #25
0
def get_part_html_tree(dist,
                       pn,
                       extra_search_terms='',
                       url=None,
                       descend=2,
                       local_part_html=None,
                       scrape_retries=2):
    '''@brief Find the farnell HTML page for a part number and return the URL and parse tree.
       @param dist
       @param pn Part number `str()`.
       @param extra_search_terms
       @param url
       @param descend
       @param local_part_html
       @param scrape_retries `int` Quantity of retries in case of fail.
       @return (html `str()` of the page, url)
    '''

    # Use the part number to lookup the part using the site search function, unless a starting url was given.
    if url is None:
        #        url = 'http://www.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15003&langId=-1&storeId=10194&gs=true&st=' + urlquote(
        #            pn + ' ' + extra_search_terms,
        #            safe='')
        url = 'http://it.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15001&langId=-4&storeId=10165&gs=true&st=' + urlquote(
            pn + ' ' + extra_search_terms, safe='')

    elif url[0] == '/':
        url = 'http://www.farnell.com' + url
    elif url.startswith('..'):
        url = 'http://www.farnell.com/Search/' + url

    # Open the URL, read the HTML from it, and parse it into a tree structure.
    try:
        html = fake_browser(url, scrape_retries)
    except:
        logger.log(DEBUG_OBSESSIVE,
                   'No HTML page for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # Abort if the part number isn't in the HTML somewhere.
    # (Only use the numbers and letters to compare PN to HTML.)
    if re.sub('[\W_]', '',
              str.lower(pn)) not in re.sub('[\W_]', '', str.lower(str(html))):
        logger.log(DEBUG_OBSESSIVE,
                   'No part number {} in HTML page from {}'.format(pn, dist))
        raise PartHtmlError

    try:
        tree = BeautifulSoup(html, 'lxml')
    except Exception:
        logger.log(DEBUG_OBSESSIVE,
                   'No HTML tree for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # If the tree contains the tag for a product page, then just return it.
    if tree.find('div', class_='productDisplay', id='page') is not None:
        return tree, url

    # If the tree is for a list of products, then examine the links to try to find the part number.
    if tree.find('table', class_='productLister', id='sProdList') is not None:
        logger.log(DEBUG_OBSESSIVE,
                   'Found product table for {} from {}'.format(pn, dist))
        if descend <= 0:
            logger.log(DEBUG_OBSESSIVE,
                       'Passed descent limit for {} from {}'.format(pn, dist))
            raise PartHtmlError
        else:
            # Look for the table of products.
            products = tree.find('table',
                                 class_='productLister',
                                 id='sProdList').find_all('tr',
                                                          class_='altRow')

            # Extract the product links for the part numbers from the table.
            product_links = []
            for p in products:
                try:
                    product_links.append(
                        p.find('td', class_='mftrPart').find('a'))
                except AttributeError:
                    continue

            # Extract all the part numbers from the text portion of the links.
            part_numbers = [l.text for l in product_links]

            # Look for the part number in the list that most closely matches the requested part number.
            match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

            # Now look for the link that goes with the closest matching part number.
            for l in product_links:
                if l.text == match:
                    # Get the tree for the linked-to page and return that.
                    logger.log(
                        DEBUG_OBSESSIVE,
                        'Selecting {} from product table for {} from {}'.
                        format(l.text.strip(), pn, dist))
                    return get_part_html_tree(dist,
                                              pn,
                                              extra_search_terms,
                                              url=l.get('href', ''),
                                              descend=descend - 1,
                                              scrape_retries=scrape_retries)

    # I don't know what happened here, so give up.
    logger.log(DEBUG_OBSESSIVE,
               'Unknown error for {} from {}'.format(pn, dist))
    raise PartHtmlError
Beispiel #26
0
def get_mouser_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
    '''Find the Mouser HTML page for a part number and return the URL and parse tree.'''

    # Use the part number to lookup the part using the site search function, unless a starting url was given.
    if url is None:
        url = 'http://www.mouser.com/Search/Refine.aspx?Keyword=' + urlquote(
            pn + ' ' + extra_search_terms,
            safe='')
    elif url[0] == '/':
        url = 'http://www.mouser.com' + url
    elif url.startswith('..'):
        url = 'http://www.mouser.com/Search/' + url

    # Open the URL, read the HTML from it, and parse it into a tree structure.
    req = FakeBrowser(url)
    req.add_header('Cookie', 'preferences=ps=www2&pl=en-US&pc_www2=USDe')
    for _ in range(HTML_RESPONSE_RETRIES):
        try:
            response = urlopen(req)
            html = response.read()
            break
        except WEB_SCRAPE_EXCEPTIONS:
            logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
            pass
    else: # Couldn't get a good read from the website.
        logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # Abort if the part number isn't in the HTML somewhere.
    # (Only use the numbers and letters to compare PN to HTML.)
    if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
        logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist))
        raise PartHtmlError
    
    try:
        tree = BeautifulSoup(html, 'lxml')
    except Exception:
        logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
        raise PartHtmlError

    # If the tree contains the tag for a product page, then just return it.
    if tree.find('div', id='product-details') is not None:
        return tree, url

    # If the tree is for a list of products, then examine the links to try to find the part number.
    if tree.find('table', class_='SearchResultsTable') is not None:
        logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
        if descend <= 0:
            logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
            raise PartHtmlError
        else:
            # Look for the table of products.
            products = tree.find(
                'table',
                class_='SearchResultsTable').find_all(
                    'tr',
                    class_=('SearchResultsRowOdd', 'SearchResultsRowEven'))

            # Extract the product links for the part numbers from the table.
            product_links = [p.find('div', class_='mfrDiv').a for p in products]

            # Extract all the part numbers from the text portion of the links.
            part_numbers = [l.text for l in product_links]

            # Look for the part number in the list that most closely matches the requested part number.
            match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]

            # Now look for the link that goes with the closest matching part number.
            for l in product_links:
                if l.text == match:
                    # Get the tree for the linked-to page and return that.
                    logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist))
                    return get_mouser_part_html_tree(dist, pn, extra_search_terms,
                                url=l['href'], descend=descend-1)

    # I don't know what happened here, so give up.
    logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
    raise PartHtmlError