def __init__(self, origin_name, uid, description): from Controllers import WebDatabase self.file_path = '' self.web_db = WebDatabase() self.uid = uid self.origin_name = origin_name self.web_db.add_origin(origin_name, uid, description)
class UrlImporter: def __init__(self, origin_name, uid, description): from Controllers import WebDatabase self.file_path = '' self.web_db = WebDatabase() self.uid = uid self.origin_name = origin_name self.web_db.add_origin(origin_name, uid, description) def get_url(self, url, file_path): if file_path is not '': self.file_path = file_path else: self.file_path = '/tmp/urls.tar.gz' openurl = urllib2.urlopen(url) file = open(self.file_path, 'wb') file.write(openurl.read()) file.close() return self.file_path def import_tar_file(self, file): tar = tarfile.open(file) tar.extractall('/tmp/nanny') tar.close() for root, dirs, files in os.walk('/tmp/nanny/'): for file in files: path = root + '/' + file category = path.split('/')[ -2] # Last folder of the path is category. type = file[:-1] self.import_file(category, type, path) def import_file(self, category, type, filepath): file = open(filepath) for line in file: line = line[:-1] # Clean out the newline character. if self.is_a_url(line): self.web_db.add_web(False, 'black', self.uid, category, type, self.origin_name, self.origin_name, line) def is_a_url(self, url): if len(url) > 0: return True
class UrlImporter: def __init__(self, origin_name, uid, description): from Controllers import WebDatabase self.file_path = "" self.web_db = WebDatabase() self.uid = uid self.origin_name = origin_name self.web_db.add_origin(origin_name, uid, description) def get_url(self, url, file_path): if file_path is not "": self.file_path = file_path else: self.file_path = "/tmp/urls.tar.gz" openurl = urllib2.urlopen(url) file = open(self.file_path, "wb") file.write(openurl.read()) file.close() return self.file_path def import_tar_file(self, file): tar = tarfile.open(file) tar.extractall("/tmp/nanny") tar.close() for root, dirs, files in os.walk("/tmp/nanny/"): for file in files: path = root + "/" + file category = path.split("/")[-2] # Last folder of the path is category. type = file[:-1] self.import_file(category, type, path) def import_file(self, category, type, filepath): file = open(filepath) for line in file: line = line[:-1] # Clean out the newline character. if self.is_a_url(line): self.web_db.add_web(False, "black", self.uid, category, type, self.origin_name, self.origin_name, line) def is_a_url(self, url): if len(url) > 0: return True
def __init__(self, origin_name, uid, description): from Controllers import WebDatabase self.file_path = "" self.web_db = WebDatabase() self.uid = uid self.origin_name = origin_name self.web_db.add_origin(origin_name, uid, description)