Example #1
0
    def test_verify_url(self):
        """Test verify_url with good and bad urls
        """

        bad_url = "weofkej"
        good_url = "http://www.google.com"
        no_http_url = "www.google.com"
        good_host_bad_path = "http://www.google.com/-##4@3weo$%*"

        self.assertTrue(verify_url(good_url))

        self.assertFalse(verify_url(no_http_url))
        self.assertFalse(verify_url(bad_url))
        self.assertFalse(verify_url(good_host_bad_path))
Example #2
0
    def test_verify_url(self):
        """Test verify_url with good and bad urls
        """

        bad_url = "weofkej"
        good_url = "http://www.google.com"
        no_http_url = "www.google.com"
        good_host_bad_path = "http://www.google.com/-##4@3weo$%*"

        self.assertTrue(verify_url(good_url))

        self.assertFalse(verify_url(no_http_url))
        self.assertFalse(verify_url(bad_url))
        self.assertFalse(verify_url(good_host_bad_path))
        print("PASSED TEST VERIFY URL")
Example #3
0
def main(args):
    """
    Takes argparse arguments object,
    and returns exit code based on success of selected process.
    """    
    if verify_url(args.url) and verify_dir(args.directory):
        return scrape_webpage(root=args.url, dir_=args.directory, format_=args.format,\
                             lang=args.language, needs_check=args.check) 

    return -1
    def __init__(self, url, auto_commit=True, unique_key='_id'):
        """Verify Solr URL and establish a connection.
        """
        if verify_url(url) is False:
            raise SystemError

        self.solr = Solr(url)
        self.unique_key = unique_key
        self.auto_commit = auto_commit

        if auto_commit:
            self.run_auto_commit()
    def __init__(self, url, auto_commit=True, unique_key='_id'):
        """Verify Elastic URL and establish a connection.
        """

        if verify_url(url) is False:
            raise SystemError
        self.elastic = ES(server=url)
        self.auto_commit = auto_commit
        self.doc_type = 'string'  # default type is string, change if needed
        self.unique_key = unique_key
        if auto_commit:
            self.run_auto_commit()
    def __init__(self, url, auto_commit=True, unique_key='_id'):
        """Verify Solr URL and establish a connection.
        """
        if verify_url(url) is False:
            raise SystemError

        self.solr = Solr(url)
        self.unique_key = unique_key
        self.auto_commit = auto_commit

        if auto_commit:
            self.run_auto_commit()
    def __init__(self, url, auto_commit=True, unique_key='_id'):
        """Verify Elastic URL and establish a connection.
        """

        if verify_url(url) is False:
            raise SystemError
        self.elastic = ES(server=url)
        self.auto_commit = auto_commit
        self.doc_type = 'string'  # default type is string, change if needed
        self.unique_key = unique_key
        if auto_commit:
            self.run_auto_commit()