Esempio n. 1
0
    def __init__(self, origin_name, uid, description):
        from Controllers import WebDatabase

        self.file_path = ''
        self.web_db = WebDatabase()
        self.uid = uid
        self.origin_name = origin_name

        self.web_db.add_origin(origin_name, uid, description)
Esempio n. 2
0
class UrlImporter:
    def __init__(self, origin_name, uid, description):
        from Controllers import WebDatabase

        self.file_path = ''
        self.web_db = WebDatabase()
        self.uid = uid
        self.origin_name = origin_name

        self.web_db.add_origin(origin_name, uid, description)

    def get_url(self, url, file_path):
        if file_path is not '':
            self.file_path = file_path
        else:
            self.file_path = '/tmp/urls.tar.gz'

        openurl = urllib2.urlopen(url)
        file = open(self.file_path, 'wb')
        file.write(openurl.read())
        file.close()

        return self.file_path

    def import_tar_file(self, file):
        tar = tarfile.open(file)
        tar.extractall('/tmp/nanny')
        tar.close()

        for root, dirs, files in os.walk('/tmp/nanny/'):
            for file in files:
                path = root + '/' + file
                category = path.split('/')[
                    -2]  # Last folder of the path is category.
                type = file[:-1]
                self.import_file(category, type, path)

    def import_file(self, category, type, filepath):
        file = open(filepath)
        for line in file:
            line = line[:-1]  # Clean out the newline character.
            if self.is_a_url(line):
                self.web_db.add_web(False, 'black', self.uid, category, type,
                                    self.origin_name, self.origin_name, line)

    def is_a_url(self, url):
        if len(url) > 0:
            return True
Esempio n. 3
0
class UrlImporter:
    def __init__(self, origin_name, uid, description):
        from Controllers import WebDatabase

        self.file_path = ""
        self.web_db = WebDatabase()
        self.uid = uid
        self.origin_name = origin_name

        self.web_db.add_origin(origin_name, uid, description)

    def get_url(self, url, file_path):
        if file_path is not "":
            self.file_path = file_path
        else:
            self.file_path = "/tmp/urls.tar.gz"

        openurl = urllib2.urlopen(url)
        file = open(self.file_path, "wb")
        file.write(openurl.read())
        file.close()

        return self.file_path

    def import_tar_file(self, file):
        tar = tarfile.open(file)
        tar.extractall("/tmp/nanny")
        tar.close()

        for root, dirs, files in os.walk("/tmp/nanny/"):
            for file in files:
                path = root + "/" + file
                category = path.split("/")[-2]  # Last folder of the path is category.
                type = file[:-1]
                self.import_file(category, type, path)

    def import_file(self, category, type, filepath):
        file = open(filepath)
        for line in file:
            line = line[:-1]  # Clean out the newline character.
            if self.is_a_url(line):
                self.web_db.add_web(False, "black", self.uid, category, type, self.origin_name, self.origin_name, line)

    def is_a_url(self, url):
        if len(url) > 0:
            return True
Esempio n. 4
0
    def __init__(self, origin_name, uid, description):
        from Controllers import WebDatabase

        self.file_path = ""
        self.web_db = WebDatabase()
        self.uid = uid
        self.origin_name = origin_name

        self.web_db.add_origin(origin_name, uid, description)