Esempio n. 1
0
    def get_idxurl(self, gsoup, rurl, urle):
        """
        Get the list of the elements inside the gallery index based on the root
        domain url and html div tags

        Args:
            gsoup (bs-obj): beatifulSoup object containing the gallery's
            element list
            rurl (str): root URL of the domain to complete the element url
            urle (str): HTML <div> keyword to process the Page's scraped
            gallery urls

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list): list with each of the gallery's unique urls
        """
        try:

            gm = self.gallery
            ans = gm.get_idxurl(gsoup, rurl, urle)
            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: get_idxurl")
Esempio n. 2
0
    def get_idxtitle(self, gsoup, etitle):
        """
        Get the element titles from the gallery main page

        Args:
            gsoup (bs-obj): beatifulSoup object containing the gallery's
            element list
            etitle HTML <div> keyword to process the scraped data from
            the gallery's soup to get the element titles

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list): gallery element (paints) titles in string
        """
        try:

            gm = self.gallery
            ans = gm.get_idxtitle(gsoup, etitle)
            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: get_idxtitle")
Esempio n. 3
0
    def scrapidx(self, gurl, stime, div, attrs):
        """
        Scrap the gallery, create a new index and recover all elements in it

        Args:
            gurl (str): URL for the gallery to scrap data
            div (str): HTML <div> keyword to search and scrap
            attrs (dict): decorative attributes in the <div> keyword to refine
            the search and scrap
            stime (float): waiting time between requests

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (bs-obj): div and attrs filtered beatifulsoup object
        """
        try:
            gm = self.gallery
            ans = gm.scrapidx(gurl, stime, div, attrs)
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: scrapidx")
Esempio n. 4
0
    def findin(self, division, attributes=None, multiple=True):
        """
        find HTML tags inside a BeautifulSoup class attribute.

        Args:
            division (str): HTML tag to find in soup ie.: "div", or
            "li"
            attributes (dict, optional): decorators to highlight the divs
            options. Defaults to None.
            multiple (bool, optional): True to find multiple tag occurrences in
            the HTML, False if not. Default to True

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (bs-obj): filtered BeautifulSoup object
        """
        try:
            ans = None

            if multiple is True:
                ans = self.sbody.findAll(division, attrs=attributes)

            elif multiple is False:
                ans = self.sbody.find(division, attrs=attributes)

            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Page: findin")
Esempio n. 5
0
    def setup_local(self, *args):
        """
        Set up local gallery filepath acording to the root gallery folder and
        other subfolders

        Args:
            rootf (str): name of the main gallery local folder
            subfolders (list, optional): the subfolders names to the gallery
            conforming the absolute dirpath

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            wpath (str): returns the local filepath to the gallery
        """
        try:

            # answer with realpath local subfoders
            wpath = str()
            wpath = os.path.join(*args)

            # if the path doesnt exists you create it
            if not os.path.exists(wpath):

                os.makedirs(wpath)

            return wpath

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: setup_local")
Esempio n. 6
0
    def get_idxurl(self, gsoup, rurl, urle):
        # TODO: remove after implement the Topic() class
        """
        get the list of the elements inside the gallery index based on the root
        domain url and html div tags

        Args:
            gsoup (bs-obj): beatifulSoup object containing the gallery's
            element list
            rurl (str): root URL of the domain to complete the element url
            urle (str): HTML <div> keyword to process the Page's scraped
            gallery urls

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list): list with each of the gallery's unique urls
        """
        try:

            ans = list()

            for title in gsoup:

                turl = urllib.parse.urljoin(rurl, title.get(urle))
                ans.append(turl)

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: get_idxurl")
Esempio n. 7
0
    def newidx(self, cols, data):
        """
        creates a new dataframe in the model based on the columns
        names and new data.

        Args:
            columns (list): list of column names to create the new dataframe
            data (list:list, pandas/numpy matrix): data for the columns the
            new dataframe

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (bool): true if the function created a new df-frame,
            false otherwise
        """
        try:
            ans = False
            self.data_frame = pd.DataFrame(columns=self.schema)

            for col, td in zip(cols, data):

                self.data_frame[col] = td
                ans = True

            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: newidx")
Esempio n. 8
0
    def get_idxid(self, gsoup, ide, clean):
        # TODO: remove after implement the Topic() class
        """
        get the unique identifier (ID) of the gallery elements (paints) and
        list them to introduce them itto the dataframe

        Args:
            gsoup (bs-obj): list with gallery elements in Beatiful Soup format
            ide (str): HTML <div> keyword to extract the element (paint) ID

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list): list with the elements (paints) IDs
        """
        try:

            ans = list()

            for element in gsoup:

                tid = element.get(ide).replace(clean, "")
                ans.append(tid)

            # returning answer
            return ans

            # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: get_idxid")
Esempio n. 9
0
    def clean_dlurl(self, gsoup, rurl, urle):
        # TODO: remove after implement the Topic() class
        """
        recovers the download URL for a gallery element

        Args:
            gsoup (bs-obj): beatifulSoup object with gallery element list
            rurl (str): domain root URL to complete the gallery index
            urle (str): HTML <div> keyword to scrap the gallery index
            urls to download files

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (str): unique URL with the downloadable element's file
        """
        try:
            ans = None

            if gsoup is not None:
                url = gsoup.get(urle)
                ans = urllib.parse.urljoin(rurl, url)

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: clean_dlurl")
Esempio n. 10
0
    def updata(self, column, data):
        """
        updates a single column with new data, the size of the data needs to be
        the same as the existing records

        Args:
            column (str): name of the column in the dataframe to update
            data (list/np.array): dataframe of the data to update

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (dataframe.info()): updated pandas dataframe description
        """
        try:
            ans = False
            self.data_frame[column] = data
            if self.data_frame[column] is not None:
                ans = True
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: updata")
Esempio n. 11
0
    def save_gallery(self, fn, dfolder):
        """
        save the in memory dataframe into a CSV file with UTF-8 encoding

        Args:
            fn (str): file name with .csv extension
            dfolder (file-object): valid dirpath str or array with
            valid folders.

        Raises:
            exp: raise a generic exception if something goes wrong
        """
        try:
            # pandas function to save dataframe in CSV file
            ans = False
            gfp = os.path.join(os.getcwd(), dfolder, fn)
            tdata = self.data_frame.to_csv(gfp,
                                           sep=",",
                                           index=False,
                                           encoding="utf-8",
                                           mode="w",
                                           quoting=csv.QUOTE_ALL)
            if tdata is None:
                ans = True
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: save_gallery")
Esempio n. 12
0
    def create_localfolders(self, *args):
        """
        Creates local subfolders with the gallery folder as root for them

        Args:
            gfolder (str): name of the main gallery folder
            coln (str): name of the ID column to create the folders

        Raises:
            exp: raise a generic exception if something goes wrong
        """
        try:

            gfolder = args[0]
            coln = args[1]

            # looping throught ID list as folder names for the local gallery
            for folder in self.getdata(coln):

                # create the local folder path to create if necessary
                tfp = os.path.join(gfolder, folder)

                # if the local folder doesnt exists
                if not os.path.exists(tfp):
                    os.makedirs(tfp)

                # the local forlder already ecists
                elif os.path.exists(tfp):
                    pass

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: create_localfolders")
Esempio n. 13
0
    def export_json(self, gfolder, incol, expcol, fname):
        """
        export the data from one column in the model's dataframe into JSON file
        in an specific local gallery folder

        Args:
            gfolder (str): name of the main gallery folder
            incol (str): name of the column in the dataframe with the
            gallery index with unique IDs for each elements (same as the local
            folder's names)
            expcol (str): name of the column with the data to export to JSON
            fname (str): name of the file to save

        Raises:
            exp: raise a generic exception if something goes wrong
        """
        try:
            # working variables
            idd = self.getdata(incol)
            expd = self.getdata(expcol)

            for tindex, tdata in zip(idd, expd):

                tfile = fname + ".json"
                self.write_json(tdata, tfile, gfolder, tindex)
                time.sleep(DEFAULT_SHORT_SLEEP_TIME)

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: export_json")
Esempio n. 14
0
    def write_json(self, data, filename, *args):
        """
        Save a json into a local file according to the gallery folder
        and subfolders

        Args:
            data (JSON): JSON data to save in file
            filename (str): JSON fole name
            gfolder (str): name of the main gallery folder
            subfolders (str): list of subfolder names to the main gallery
            folder, can be as much as neeeded

        Raises:
            exp: raise a generic exception if something goes wrong
        """
        try:
            # configuring local filepath
            lfp = os.path.join(*args, filename)

            # saving data in with utf-8 encoding
            with open(lfp, "w", encoding="utf-8") as file:
                file.write(data)
                file.close()

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: write_json")
Esempio n. 15
0
    def getdata(self, coln, *args, **kwargs):
        """
        get the data based in the column name of the model's dataframe

        Args:
            coln (str): column name of the gallery dataframe to get

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list): data from the column name
        """
        try:
            # getting the element url in the gallery
            ans = list()
            gm = self.gallery
            ans = gm.getdata(coln, *args, **kwargs)

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: getdata")
Esempio n. 16
0
    def __init__(self, *args, **kwargs):
        """
        Controller() class creator

        Args:
            webg_path (str): URL for the gallery to scrap data
            localg_path (str): local dirpath for the gallery data
            schema (list): array with the column names for the model
            gallery (Gallery): object with the gallery dataframe model
            # wpage (Page): the current webpage the controller is scrapping

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            Controller (Model): return a new Controller() object
        """

        try:
            # Controller default values
            self.webg_path = str()
            self.localg_path = str()
            self.imgd_path = str()
            self.schema = copy.deepcopy(DEFAULT_FRAME_SCHEMA)
            self.gallery = Gallery()
            self.wpage = Page()

            # when arguments are pass as parameters
            if len(args) > 0:
                i = 0
                for i in range(int(len(args))):

                    # URL of the remote gallery to scrap
                    if i == 0:
                        self.webg_path = args[i]

                    # local dirpath to save the gallery CSV
                    if i == 1:
                        self.localg_path = args[i]

                    # painting list containing the data of the gallery
                    if i == 2:
                        self.imgd_path = args[i]

            # if there are dict decrators in the creator
            if len(kwargs) > 0:

                for key in list(kwargs.keys()):

                    # updating schema in the controller
                    if key == "schema":
                        self.schema = copy.deepcopy(kwargs[key])

                    # setting the max size of the gallery
                    if key == "model":
                        self.gallery = kwargs[key]

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: __init__")
Esempio n. 17
0
    def load_gallery(self, fn, dfolder):
        """
        loads the gallery from a CSV file in UTF-8 encoding

        Args:
            fn (str): file name with .csv extension
            dfolder (file-object): valid dirpath str or array with
            valid folders.

        Raises:
            exp: raise a generic exception if something goes wrong
        """
        try:
            # read an existing CSV fileto update the dataframe
            ans = False
            gfp = os.path.join(os.getcwd(), dfolder, fn)
            self.data_frame = pd.read_csv(gfp,
                                          sep=",",
                                          encoding="utf-8",
                                          engine="python",
                                          quoting=csv.QUOTE_ALL)
            if self.data_frame is not None:
                ans = True
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: load_gallery")
Esempio n. 18
0
    def get_srcimgs(self, sfp, sfext):
        """
        Recover the images inside the localpath using the file extension

        Args:
            sfp (str): local folderpath of the source image to scan
            sfext (str): source image file extension, ie.: "jpg"

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list): list of the source images local filepaths
        """
        try:
            # default answer
            ans = list()
            files = os.listdir(sfp)

            # cheking if there is files in folder
            if len(files) > 0:
                # finding the proper image extension file
                for f in files:
                    if f.endswith(sfext):
                        fn = os.path.join(sfp, f)
                        ans.append(fn)

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: get_srcimgs")
Esempio n. 19
0
    def __init__(self, *args, **kwargs):
        """
        creator of the class gallery()

        Args:
            webg_path (str): URL for the gallery to scrap data
            localg_path (str): local dirpath for the gallery data
            schema (list): array with the column names for the model
            data_frame (data_frame, optional): panda df with data (ie.: paints)
            in the gallery, you can pass an existing df, Default is empty
            wpage (Page): the current webpage the controller is scrapping

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            Model (Model): return a new Model() object
        """
        try:

            # default creator attributes
            self.webg_path = str()
            self.localg_path = str()
            self.imgd_path = str()
            self.schema = copy.deepcopy(DEFAULT_FRAME_SCHEMA)
            self.data_frame = pd.DataFrame(columns=DEFAULT_FRAME_SCHEMA)
            self.wpage = Page()

            # when arguments are pass as parameters
            if len(args) > 0:
                for arg in args:
                    # URL of the remote gallery to scrap
                    if args.index(arg) == 0:
                        self.webg_path = arg

                    # local dirpath to save the gallery CSV
                    if args.index(arg) == 1:
                        self.localg_path = arg

                    # local dirpath to save the images
                    if args.index(arg) == 2:
                        self.imgd_path = arg

                    # dataframes containing the data of the gallery
                    if args.index(arg) == 3:
                        self.data_frame = arg

            # if there are dict decrators in the creator
            if len(kwargs) > 0:

                for key in list(kwargs.keys()):

                    # updating schema in the model
                    if key == "schema":
                        self.schema = copy.deepcopy(kwargs[key])
                        self.data_frame = pd.DataFrame(columns=self.schema)

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: __init__")
Esempio n. 20
0
    def clean_imgfn(self, text, elem, clean):
        """
        scrap elements within a link based on the <div>, html marks
        and other attributes or decoratos

        Args:
            text (str): text to be clean
            elem (str): keyword to split the str and process
            clean (str): keyword to clean in the text

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (str): clean file name with extension
        """
        try:

            ans = None
            ans = text.split(elem)[1].strip().strip(clean)

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: clean_imgfn")
Esempio n. 21
0
    def scrapidx(self, gurl, stime, div, attrs):
        """
        Scrap the gallery index and recover all the elements in it

        Args:
            gurl (str): gallery URL to scrap data
            div (str): HTML <div> keyword to search and scrap
            attrs (dict): decorative attributes in the <div> keyword to refine
            the search and scrap
            stime (float): waiting time between requests

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (bs-obj): div and attrs filtered beatifulsoup object
        """
        try:
            # reset working web page
            self.wpage = Page()
            ans = None

            # getting the basic element list from gallery online index
            self.wpage.get_collection(gurl, stime)
            ans = self.wpage.findin(div, attributes=attrs)

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: scrapidx")
Esempio n. 22
0
    def dlpaints(self, *args):
        """
        download the paint files from the list of available asset url
        in the gallery

        Args:
            dlurl_coln (str): column name of known download URLs
            gfolder (str): name of the main gallery folder
            div (str): HTML <div> search and scrap keyword
            attrs (dict): decorative <div> keywords to refine the scrap
            elem (str): secondary <div> keyword to refine the search
            and scrap process

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list): list of boolean marking if it is possible to
            download a picture file or not
        """
        try:
            # getting the element url in the gallery
            ans = list()
            gm = self.gallery
            dlurl_coln = args[0]
            gf = args[1]
            div = args[2]
            attrs = args[3]
            elem = args[4]
            clean = args[5]

            for url in self.getdata(dlurl_coln):

                # the url is valid, it can be null or na or none
                if validators.url(str(url)) is True:

                    # recovers the image file name
                    tsoup = gm.get_imgfn(url, div, attrs)
                    # clean the name to save
                    timgf = gm.clean_imgfn(tsoup, elem, clean)
                    # download and save the image in the local folder
                    tans = gm.get_imgf(gf, url, timgf)
                    ans.append(tans)

                # invalid url
                else:
                    tans = False
                    ans.append(tans)

                time.sleep(DEFAULT_SLEEP_TIME)

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: dlpaints")
Esempio n. 23
0
    def clean_relwork(self, rurl, soup, elem, clean):
        # TODO: remove after implement the Topic() class
        """
        process the scraped data from the beatifulSoup object and saves the
        related work information into a JSON files

        Args:
            rurl (str): domain root URL to complete the related-work link
            soup (bs-obj): beatifulSoup object with the related-work data
            elem (str): HTML <div> keyword to scrap the related-work data
            clean (list): secondary <div> to clean the related-work data

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (dict): Element (paint) clean related-work
        """
        try:
            # default answer
            ans = dict()

            # checking if searchtags exists
            if soup is not None:

                # finding searhtags <article> in the sou
                relworks = soup[0].findAll(elem)

                # processing related work
                i = 1
                for rw in relworks:
                    # cleaning data and getting all keys and values
                    key = str(rw.find(clean[0]).string)
                    key = self.clrtext(key)

                    url = rw.find(clean[1])
                    url = url.get(clean[2])
                    value = str(urllib.parse.urljoin(rurl, url))

                    # may names are similar in related work
                    if key in ans.keys():

                        # creating alternate key for the dict
                        key = key + " " + str(i)
                        i += 1

                    # updating answer dict
                    td = {key: value}
                    ans.update(copy.deepcopy(td))

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: clean_relwork")
Esempio n. 24
0
    def scrap_relwork(self, *args, **kwargs):
        """
        able to scrap the related work data from the webpage using the
        dataframe's column name, the HTML divs and other decorators in the url

        Args:
            coln (str): ID column name of the gallery dataframe
            rurl (str): root URL of the domain to complete the related work
            div (str): HTML <div> keyword to search and scrap
            attrs (dict): decorative attributes in the <div> keyword to refine
            elem (str): element is a secondary <div> keyword to refine the
            search and scrap process

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list): the list of the related work recovered from the
            gallery elements
        """
        try:
            # get the url list from the dataframe in the model
            ans = list()
            gm = self.gallery
            coln = args[0]
            rurl = args[1]
            div = args[2]
            attrs = args[3]
            elem = args[4]
            clean = args[5]

            for url in self.getdata(coln):

                # scraping elements each gallery page
                tsoup = gm.scrape(url, div, attrs, **kwargs)

                # # default empty dict to return
                tans = dict()

                # checking if there is any related work to process
                if len(tsoup) > 0:

                    # extracting the search tags from the soup
                    tans = gm.clean_relwork(rurl, tsoup, elem, clean)

                # compose answer
                tans = self.to_json(tans)
                ans.append(tans)
                time.sleep(DEFAULT_SLEEP_TIME)

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: scrap_relwork")
Esempio n. 25
0
    def export_paints(self, *args):
        """
        Export the images from a source folder into a target folder,
        the target images are in color and in grayscale

        Args:
            coln (str): ID column name of the gallery dataframe
            sfext (str): source image file extension, ie.: "jpg"
            tfext (dict): target image file extension, ie.: "jpg"
            tsufix (dict): target image file sufix, ie.: "-rgb"

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list): the list of dict with the relative localpath
            file for each gallery element
            (ej.: {"rgb": "/Data/Img/s0004V1962r-rgb.jpg",
                    "bw": "/Data/Img/s0004V1962r-b&w.jpg"
                    })
        """
        try:
            # default answer
            ans = list()
            # working variables
            coln = args[0]
            sfext = args[1]
            tfext = args[2]
            tsufix = args[3]
            gm = self.gallery

            # iterating over the index data
            for tid in self.getdata(coln):
                # config source and target folders
                srcf = os.path.join(self.localg_path, tid)
                tgtf = os.path.join(self.imgd_path, tid)

                # recovering source images
                srcfn = gm.get_srcimgs(srcf, sfext)
                # setting target images
                tgtfn = gm.set_tgtimgs(srcfn, tgtf, tfext, tsufix)
                # exporting images
                tans = gm.export_imgs(srcfn, tgtfn, tsufix)

                # compose answer
                tans = self.to_json(tans)
                ans.append(tans)
                time.sleep(DEFAULT_SHORT_SLEEP_TIME)

            # return answer list
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: export_paints")
Esempio n. 26
0
    def clean_searchtags(self, rurl, soup, elem, clean):
        # TODO: remove after implement the Topic() class
        """
        Clean the page's search-tags from the beatifulSoup object

        Args:
            rurl (str): root URL of the domain to complete the search-tags
            soup (bs-obj): beatifulSoup object with the search-tags data
            elem (str): HTML <div> keyword to scrap the search-tags data
            clean (str): secondary <div> keyword to clean the data from
            the scrap

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (dict): Element (paint) clean search-tags
        """
        try:
            # default answer
            ans = dict()

            # checking if searchtags exists
            if soup is not None:

                # checking is the correct collection search tags
                if len(soup) > 0:

                    # finding searhtags <a> in the sou
                    tags = soup[0].findAll(elem)

                    # processing the search tags
                    if len(tags) > 0 and isinstance(tags, list) is True:

                        for tag in tags:
                            # cleaning data
                            key = str(tag.string)
                            key = self.clrtext(key)
                            url = tag.get(clean)

                            # reconstructing all the url from the page
                            value = str(urllib.parse.urljoin(rurl, url))
                            td = {key: value}

                            # updating answer dict
                            ans.update(copy.deepcopy(td))

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: clean_searchtags")
Esempio n. 27
0
    def export_shapes(self, *args):
        """
        Export the image shapes from the exported images in the target folder

        Args:
            coln (str): ID column name of the gallery dataframe
            sfext (str): source image file extension, ie.: "jpg"
            tfext (dict): target image file extension, ie.: "jpg"
            tsufix (dict): target image file sufix, ie.: "-rgb"

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list) the list of dict with the shape of each
            gallery element
            (ej.: {"rgb": (450, 280, 3),
                    "bw": (450, 280)})
        """
        try:
            # default answer
            ans = list()
            # working variables
            coln = args[0]
            tfext = args[1]
            tsufix = args[2]

            gm = self.gallery
            ip = self.imgd_path

            # iterating over the index data
            for tid in self.getdata(coln):

                # config source and target folders
                tgtf = os.path.join(ip, tid)
                # recovering source images
                tgtfn = gm.get_srcimgs(tgtf, tfext)
                # exporting shapes
                tans = gm.export_shapes(tgtfn, tsufix)

                # compose answer
                tans = self.to_json(tans)
                ans.append(tans)
                time.sleep(DEFAULT_SHORT_SLEEP_TIME)

            # return answer list
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: export_shapes")
Esempio n. 28
0
    def get_imgf(self, gfolder, dlurl, pfn):
        # TODO: remove after implement the Topic() class
        """
        save the paint file from the asset URL in the local folder path

        Args:
            gfolder (str): root local dirpath where the file is going to be
            save
            dlurl (str): url address with the downlodable image file
            pfn (str): filename to save the image

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (bool): True if the file was downloaded in the local dirpath,
            False if not
        """
        try:
            # default answer
            ans = False

            # parsing the URL to choose the local folder to save the file
            imgf = urllib.parse.urlparse(dlurl)
            imgf = imgf.path.split("/")[len(imgf.path.split("/")) - 1]
            fp = os.path.join(gfolder, imgf, pfn)

            # if the file doesnt exists
            if not os.path.exists(fp):

                # saving file from content requests in bit form
                data = self.wpage.content
                with open(fp, "wb") as file:

                    file.write(data)
                    file.close()
                    ans = True
                    return ans

            # if the file already exists
            elif os.path.exists(fp):

                ans = True
                return ans

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: get_imgf")
Esempio n. 29
0
    def clean_objdata(self, soup, elem):
        # TODO: remove after implement the Topic() class
        """
        Clean the page's object-data from the beatifulSoup object

        Args:
            soup (bs-obj): beatifulSoup object with the object-data data
            elem (str): HTML <div> keyword to scrap the object-data data

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (dict): Element (paint) clean object-data
        """
        try:
            # default answer
            ans = dict()

            # checking if object-data exists
            if soup is not None:

                # finding <dt> and <dd> from the soup
                keys = soup.findAll(elem[0])
                values = soup.findAll(elem[1])

                # soup keys and values must have data
                if len(keys) > 0 and len(values) > 0:

                    # looping over the <dt> and <dd> data
                    for key, value in zip(keys, values):

                        # cleaning data for dictionary
                        key = str(key.string)
                        key = self.clrtext(key)

                        value = str(value.string)
                        value = self.clrtext(value)

                        # temp dict for complete answer
                        td = {key: value}
                        # updating answer dict
                        ans.update(copy.deepcopy(td))

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Gallery: clean_objdata")
Esempio n. 30
0
    def scrap_searchtags(self, *args, **kwargs):
        """
        Scrap the elements (paints) search-tags using the ID column name
        in the index, the domain URL, HTML divisions <divs>, decorative
        attributes, secondary HTML elements and cleaning HTML divisions

        Args:
            coln (str): ID column name of the gallery dataframe
            rurl (str): root URL of the domain to complete the search-tags
            div (str): HTML <div> keyword to scrap the search-tags
            attrs (dict): decorative attributes in the <div> keyword to refine
            elem (str): element is a secondary <div> keyword to refine the
            search and scrap process
            clean (str): secondary <div> keyword to clean the data from
            the scrap

        Raises:
            exp: raise a generic exception if something goes wrong

        Returns:
            ans (list): list of element search-tags in JSON format
        """
        try:
            # get the url list from the dataframe in the model
            ans = list()
            gm = self.gallery
            coln = args[0]
            rurl = args[1]
            div = args[2]
            attrs = args[3]
            elem = args[4]
            clean = args[5]

            for url in self.getdata(coln):
                # scraping elements each gallery page
                tsoup = gm.scrape(url, div, attrs, **kwargs)
                # extracting the search tags from the soup
                tans = gm.clean_searchtags(rurl, tsoup, elem, clean)

                # compose answer
                tans = self.to_json(tans)
                ans.append(tans)
                time.sleep(DEFAULT_SLEEP_TIME)

            # returning answer
            return ans

        # exception handling
        except Exception as exp:
            Err.reraise(exp, "Controller: scrap_searchtags")