Exemple #1
0
    def analyze(cls, something):
        """ Accept a given input (e.g. a URL, file path, or file handle
        and determine how to normalize it into an ``Ingestor`` while
        generating metadata. """
        if isinstance(something, cls):
            return (something, )

        if isinstance(something, basestring):
            # Treat strings as paths or URLs
            url = urlparse(something)
            if url.scheme.lower() in ['http', 'https']:
                something = requests.get(something)
            elif url.scheme.lower() in ['file', '']:
                finalpath = url.path
                if osname == 'nt':
                    finalpath = finalpath[1:]
                upath = fullpath(finalpath)
                if path.isdir(upath):
                    return (cls(file_name=f) for f in directory_files(upath))
                return (cls(file_name=upath), )

        # Python requests
        if isinstance(something, requests.Response):
            fd = StringIO(something.content)
            return (cls(file_obj=fd,
                        meta={
                            'http_status': something.status_code,
                            'http_headers': clean_headers(something.headers),
                            'source_url': something.url
                        }), )

        if isinstance(something, HTTPResponse):
            # Can't tell the URL for HTTPResponses
            return (cls(file_obj=something,
                        meta={
                            'http_status': something.status,
                            'http_headers':
                            clean_headers(something.getheaders()),
                            'source_url': something.url
                        }), )

        elif hasattr(something, 'geturl') and hasattr(something, 'info'):
            # assume urllib or urllib2
            return (cls(file_obj=something,
                        meta={
                            'http_status': something.getcode(),
                            'http_headers': clean_headers(something.headers),
                            'source_url': something.url
                        }), )

        elif hasattr(something, 'read'):
            # Fileobj will be a bit bland
            return (cls(file_obj=something), )

        return []
Exemple #2
0
    def analyze(cls, something):
        """ Accept a given input (e.g. a URL, file path, or file handle
        and determine how to normalize it into an ``Ingestor`` while
        generating metadata. """
        if isinstance(something, cls):
            return (something, )

        if isinstance(something, basestring):
            # Treat strings as paths or URLs
            url = urlparse(something)
            if url.scheme.lower() in ['http', 'https']:
                something = requests.get(something)
            elif url.scheme.lower() in ['file', '']:
                finalpath = url.path
                if osname == 'nt':
                    finalpath = finalpath[1:]
                upath = fullpath(finalpath)
                if path.isdir(upath):
                    return (cls(file_name=f) for f in directory_files(upath))
                return (cls(file_name=upath),)

        # Python requests
        if isinstance(something, requests.Response):
            fd = StringIO(something.content)
            return (cls(file_obj=fd, meta={
                'http_status': something.status_code,
                'http_headers': clean_headers(something.headers),
                'source_url': something.url
            }), )

        if isinstance(something, HTTPResponse):
            # Can't tell the URL for HTTPResponses
            return (cls(file_obj=something, meta={
                'http_status': something.status,
                'http_headers': clean_headers(something.getheaders()),
                'source_url': something.url
            }), )

        elif hasattr(something, 'geturl') and hasattr(something, 'info'):
            # assume urllib or urllib2
            return (cls(file_obj=something, meta={
                'http_status': something.getcode(),
                'http_headers': clean_headers(something.headers),
                'source_url': something.url
            }), )

        elif hasattr(something, 'read'):
            # Fileobj will be a bit bland
            return (cls(file_obj=something), )

        return []
Exemple #3
0
 def __init__(self, path=None, **kwargs):
     self.path = fullpath(path)
     if os.path.exists(path) and not os.path.isdir(path):
         raise ValueError('Not a directory: %s' % path)
Exemple #4
0
 def __init__(self, path=None, **kwargs):
     self.path = fullpath(path)
     if os.path.exists(path) and not os.path.isdir(path):
         raise ValueError('Not a directory: %s' % path)