Python urlopen Examples, six.moves.urllib.urlopen Python Examples

Example #1

0

Show file

File: utils.py Project: chrisspen/django-sec

def lookup_cik(ticker, name=None):
    """
    Given a ticker symbol, retrieves the CIK.
    """
    ticker = ticker.strip().upper()
    
    # First try the SEC. In theory, should for all known symbols, even
    # deactivated ones. In practice, fails to work for many, even active ones.
    url = (
        'http://www.sec.gov/cgi-bin/browse-edgar?'
        'CIK={cik}&'
        'owner=exclude&'
        'Find=Find+Companies&'
        'action=getcompany'
    ).format(cik=ticker)
    request = urllib2.Request(url=url, headers={'User-agent':get_user_agent()})
    response = urllib2.urlopen(request)
    data = response.read()
    try:
        match = re.finditer(r'CIK=([0-9]+)', data).next()
        return match.group().split('=')[-1]
    except StopIteration:
        pass
    
    # Next, try SEC's other CIK lookup form.
    # It doesn't always work with just the ticker, so we also need to pass in
    # company name but it's the next most accurate after the first.
    # Unfortunately, this search is sensitive to punctuation in the company
    # name, which we might not have stored correctly.
    # So we start searching with everything we have, and then backoff to widen
    # the search.
    name = (name or '').strip()
    name = ''.join(_ for _ in (name or '').strip() if ord(_) < 128)
    if name:
        name_parts = name.split(' ')
        for i in six.moves.xrange(len(name_parts)):
            url = 'http://www.sec.gov/cgi-bin/cik.pl.c?company={company}'\
                .format(company='+'.join(name_parts[:-(i+1)]))
#            response = urllib2.urlopen(url)
            request = urllib2.Request(url=url, headers={'User-agent':get_user_agent()})
            response = urllib2.urlopen(request)
            data = response.read()
            matches = re.findall(r'CIK=([0-9]+)', data)
            if len(matches) == 1:
                return matches[0]
    
    # If the SEC search doesn't find anything, then try Yahoo.
    # Should work for all active symbols, but won't work for any deactive
    # symbols. 
    url = 'http://finance.yahoo.com/q/sec?s={symbol}+SEC+Filings'.format(symbol=ticker)
    #print('url2:',url
#    response = urllib2.urlopen(url)
    request = urllib2.Request(url=url, headers={'User-agent':get_user_agent()})
    response = urllib2.urlopen(request)
    data = response.read()
    try:
        match = re.finditer(r'search/\?cik=([0-9]+)', data).next()
        return match.group().split('=')[-1]
    except StopIteration:
        pass

Example #2

0

Show file

def lookup_cik(ticker, name=None):
    """
    Given a ticker symbol, retrieves the CIK.
    """
    ticker = ticker.strip().upper()

    # First try the SEC. In theory, should for all known symbols, even
    # deactivated ones. In practice, fails to work for many, even active ones.
    url = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={cik}&owner=exclude&Find=Find+Companies&action=getcompany'.format(
        cik=ticker)
    request = urllib2.Request(url=url,
                              headers={'User-agent': get_user_agent()})
    response = urllib2.urlopen(request)
    data = response.read()
    try:
        match = re.finditer('CIK=([0-9]+)', data).next()
        return match.group().split('=')[-1]
    except StopIteration:
        pass

    # Next, try SEC's other CIK lookup form.
    # It doesn't always work with just the ticker, so we also need to pass in
    # company name but it's the next most accurate after the first.
    # Unfortunately, this search is sensitive to punctuation in the company
    # name, which we might not have stored correctly.
    # So we start searching with everything we have, and then backoff to widen
    # the search.
    name = (name or '').strip()
    name = ''.join(_ for _ in (name or '').strip() if ord(_) < 128)
    if name:
        name_parts = name.split(' ')
        for i in xrange(len(name_parts)):
            url = 'http://www.sec.gov/cgi-bin/cik.pl.c?company={company}'.format(
                company='+'.join(name_parts[:-(i + 1)]))
            #            response = urllib2.urlopen(url)
            request = urllib2.Request(url=url,
                                      headers={'User-agent': get_user_agent()})
            response = urllib2.urlopen(request)
            data = response.read()
            matches = re.findall('CIK=([0-9]+)', data)
            if len(matches) == 1:
                return matches[0]

    # If the SEC search doesn't find anything, then try Yahoo.
    # Should work for all active symbols, but won't work for any deactive
    # symbols.
    url = 'http://finance.yahoo.com/q/sec?s={symbol}+SEC+Filings'.format(
        symbol=ticker)
    #print('url2:',url
    #    response = urllib2.urlopen(url)
    request = urllib2.Request(url=url,
                              headers={'User-agent': get_user_agent()})
    response = urllib2.urlopen(request)
    data = response.read()
    try:
        match = re.finditer('search/\?cik=([0-9]+)', data).next()
        return match.group().split('=')[-1]
    except StopIteration:
        pass

Example #3

0

Show file

File: base.py Project: clin88/schematics

 def validate_url(self, value):
     if not URLType.URL_REGEX.match(value):
         raise StopValidation(self.messages['invalid_url'])
     if self.verify_exists:
         from six.moves import urllib
         try:
             request = urllib.Request(value)
             urllib.urlopen(request)
         except Exception:
             raise StopValidation(self.messages['not_found'])

Example #4

0

Show file

File: base.py Project: qin0385/schematics

 def validate_url(self, value):
     if not URLType.URL_REGEX.match(value):
         raise StopValidation(self.messages['invalid_url'])
     if self.verify_exists:
         from six.moves import urllib
         try:
             request = urllib.Request(value)
             urllib.urlopen(request)
         except Exception:
             raise StopValidation(self.messages['not_found'])

Example #5

0

Show file

File: wmd.py Project: CartoDB/pysal

def _download_shapefiles(file_name):
    file_parts = file_name.split("/")
    file_prefix = file_parts[-1].split(".")[0]
    exts = [ ".shp", ".dbf", ".shx" ]
    for ext in exts:
        # rebuild url
        file_name = file_prefix + ext
        file_parts[-1] = file_name
        new_url = "/".join(file_parts)
        #print file_name, new_url
        u = urllib.urlopen(new_url)
        f = open(file_name, 'wb')
        meta = u.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        #print "Downloading: %s Bytes: %s" % (file_name, file_size)
        file_size_dl = 0
        block_sz = 8192
        while True:
            bf = u.read(block_sz)
            if not bf:
                break
            file_size_dl += len(bf)
            f.write(bf)
            status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. /
                    file_size)
            status = status + chr(8)* (len(status)+1)

Example #6

0

Show file

File: wmd.py Project: sathish-deevi/pysal

def _download_shapefiles(file_name):
    file_parts = file_name.split("/")
    file_prefix = file_parts[-1].split(".")[0]
    exts = [".shp", ".dbf", ".shx"]
    for ext in exts:
        # rebuild url
        file_name = file_prefix + ext
        file_parts[-1] = file_name
        new_url = "/".join(file_parts)
        #print file_name, new_url
        u = urllib.urlopen(new_url)
        f = open(file_name, 'wb')
        meta = u.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        #print "Downloading: %s Bytes: %s" % (file_name, file_size)
        file_size_dl = 0
        block_sz = 8192
        while True:
            bf = u.read(block_sz)
            if not bf:
                break
            file_size_dl += len(bf)
            f.write(bf)
            status = r"%10d [%3.2f%%]" % (file_size_dl,
                                          file_size_dl * 100. / file_size)
            status = status + chr(8) * (len(status) + 1)

Example #7

0

Show file

File: videohandler.py Project: Nkprasath/DSC-solution-challenge

 def downloadVideo(self, _id, logs=True):
     video = self.vdata[_id]
     url = video['url']
     stime = video['start time']
     etime = video['end time']
     sfname = "%s/%d.mp4" % (self.vdir, _id)
     if os.path.exists(sfname):
         if logs:
             print("Video Id [%d] Already Downloaded" % _id)
         return sfname
     youtubeId = self.getYoutubeId(url)
     turl = "curl 'https://hesetube.com/download.php?id=%s'" % (youtubeId)
     durl = "https://hesetube.com/video/%s.mp4?start=%f&end=%f" % (
         youtubeId, stime, etime)
     print(durl)
     print(turl)
     os.system(turl)
     cont = urllib.urlopen(durl).read()
     with open(sfname, "wb") as f:
         f.write(cont)
         print("Video Id [%d] Downloaded : %s " % (_id, youtubeId))
     fs = os.path.getsize(sfname)
     if fs < VideoHandler.STHRES:
         print("Crosscheck failed, File Size : %d" % fs)
         with open(self.logfile, "a") as f:
             f.write("Crosscheck file %d, %s with size %d\n" %
                     (_id, youtubeId, fs))
         os.remove(sfname)
         open(sfname, 'a').close()
         self.takebreak()
         return None
     else:
         self.takebreak()
         return sfname

Example #8

0

Show file

File: file_uploader.py Project: yan-zay/chromium

 def callback():
     # FIXME: Setting a timeout, either globally using socket.setdefaulttimeout()
     # or in urlopen(), doesn't appear to work on Mac 10.5 with Python 2.7.
     # For now we will ignore the timeout value and hope for the best.
     request = urllib.Request(self._url, data,
                              {'Content-Type': content_type})
     return urllib.urlopen(request)

Example #9

0

Show file

File: __init__.py Project: mhellmic/b2share

def check_for_software_updates(flash_message=False):
    """Check for a new release of Invenio.

    :return: True if you have latest version, else False if you need to upgrade
             or None if server was not reachable.
    """
    from invenio.config import CFG_VERSION
    from invenio.base.i18n import _
    try:
        find = re.compile('Invenio v[0-9]+.[0-9]+.[0-9]+(\-rc[0-9])?'
                          ' is released')

        webFile = urllib.urlopen("http://invenio-software.org/repo"
                                 "/invenio/tree/RELEASE-NOTES")

        temp = ""
        version = ""
        version1 = ""
        while 1:
            temp = webFile.readline()
            match1 = find.match(temp)
            try:
                version = match1.group()
                break
            except:
                pass
            if not temp:
                break

        webFile.close()
        submatch = re.compile('[0-9]+.[0-9]+.[0-9]+(\-rc[0-9])?')
        version1 = submatch.search(version)
        web_version = version1.group().split(".")

        local_version = CFG_VERSION.split(".")

        if (web_version[0] > local_version[0] or
                web_version[0] == local_version[0] and
                web_version[1] > local_version[1] or
                web_version[0] == local_version[0] and
                web_version[1] == local_version[1] and
                web_version[2] > local_version[2]):
            if flash_message:
                flash(_('A newer version of Invenio is available for '
                        'download. You may want to visit %s') %
                      ('<a href=\"http://invenio-software.org/wiki'
                       '/Installation/Download\">http://invenio-software.org'
                       '/wiki/Installation/Download</a>'), 'warning')

            return False
    except Exception as e:
        print(e)
        if flash_message:
            flash(_('Cannot download or parse release notes from http://'
                    'invenio-software.org/repo/invenio/tree/RELEASE-NOTES'),
                  'error')
        return None
    return True

Example #10

0

Show file

File: search_server.py Project: royalharsh/grpc-examples

 def Search(self, request, context):
   query = request.query
   query = urllib.urlencode({'q': query})
   response = urllib.urlopen(
       'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&' + query)
   .read()
   _json = json.loads(response)
   results = _json['responseData']['results']
   for result in results:
     yield result

Example #11

0

Show file

 def test_image_crop(self):
     import six.moves.urllib as urllib
     from PIL import Image
     import StringIO
     file = StringIO.StringIO(
         urllib.urlopen(
             'https://avatars2.githubusercontent.com/u/5404851?s=600&v=4').
         read())
     img = Image.open(file)
     img = img.crop((0, 0, 400, 600))

Example #12

0

Show file

File: utils.py Project: johannathiemich/Mobile_Mask_RCNN

def download_trained_weights(coco_model_path, verbose=1):
    """Download COCO trained weights from Releases.

    coco_model_path: local path of COCO trained weights
    """
    if verbose > 0:
        print("Downloading pretrained model to " + coco_model_path + " ...")
    with urllib.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
        shutil.copyfileobj(resp, out)
    if verbose > 0:
        print("... done downloading pretrained model!")

Example #13

0

Show file

File: grabber.py Project: scienceopen/Geneagrapher

    def get_record(self, id):
        """
        For the mathematician in this object, extract the list of
        advisor ids, the mathematician name, the mathematician
        institution, and the year of the mathematician's degree.
        """
        url = 'http://genealogy.math.ndsu.nodak.edu/id.php?id=' + str(id)
        page = urllib.urlopen(url)
        soup = BeautifulSoup(page, convertEntities='html')
        page.close()

        return get_record_from_tree(soup, id)

Example #14

0

Show file

File: rest_client.py Project: nitti/cinder

    def call(self, url=False, data=None, method=None):
        """Send requests to 18000 server.

        Send HTTPS call, get response in JSON.
        Convert response into Python Object and return it.
        """
        opener = urllib.build_opener(urllib.HTTPCookieProcessor(self.cookie))
        urllib.install_opener(opener)
        res_json = None

        try:
            urllib.socket.setdefaulttimeout(720)
            req = urllib.Request(url, data, self.headers)
            if method:
                req.get_method = lambda: method
            res = urllib.urlopen(req).read().decode("utf-8")

            if "xx/sessions" not in url:
                LOG.info(_LI('\n\n\n\nRequest URL: %(url)s\n\n'
                             'Call Method: %(method)s\n\n'
                             'Request Data: %(data)s\n\n'
                             'Response Data:%(res)s\n\n'), {'url': url,
                                                            'method': method,
                                                            'data': data,
                                                            'res': res})

        except Exception as err:
            LOG.error(_LE('Bad response from server: %(url)s.'
                          ' Error: %(err)s'), {'url': url, 'err': err})
            json_msg = ('{"error":{"code": %s,"description": "Connect to '
                        'server error."}}'), constants.ERROR_CONNECT_TO_SERVER
            res_json = json.loads(json_msg)
            return res_json

        try:
            res_json = json.loads(res)
        except Exception as err:
            LOG.error(_LE('JSON transfer error: %s.'), err)
            raise

        return res_json

Example #15

0

Show file

File: wmd.py Project: CartoDB/pysal

def _uri_reader(uri):
    j = json.load(urllib.urlopen(uri))
    return j

Example #16

0

Show file

File: UTILS_Html2text.py Project: Noethys/Teamworks

    BODY_WIDTH = options.body_width
    GOOGLE_LIST_INDENT = options.list_indent

    # process input
    if len(args) > 0:
        file_ = args[0]
        encoding = None
        if len(args) == 2:
            encoding = args[1]
        if len(args) > 2:
            p.error('Too many arguments')

        if file_.startswith('http://') or file_.startswith('https://'):
            baseurl = file_
            j = urllib.urlopen(baseurl)
            text = j.read()
            if encoding is None:
                try:
                    from feedparser import _getCharacterEncoding as enc
                except ImportError:
                    enc = lambda x, y: ('utf-8', 1)
                encoding = enc(j.headers, text)[0]
                if encoding == 'us-ascii':
                    encoding = 'utf-8'
            data = text.decode(encoding)

        else:
            data = open(file_, 'rb').read()
            if encoding is None:
                try:

Example #17

0

Show file

File: parser.py Project: nfredrik/pyraml-parser

def _load_network_resource(url):
    with contextlib.closing(urllib2.urlopen(url, timeout=60.0)) as f:
        # We fully rely of mime type to remote server b/c according
        # of specs it MUST support RAML mime
        mime_type = f.headers.gettype()
        return f.read(), mime_type

Example #18

0

Show file

                    else:
                        six.print_('payment methods parsed')
                six.print_(small2)
                six.print_(cl2)
                six.print_(tabl0)

                output(hyip)
        elif method == 'mechanize':
            six.print_(
                'Not supported yet! Use native or dont define @method at all')
        elif method == 'urllib2':
            six.print_(
                'Not supported yet! Use native or dont define @method at all')
            exit(1)
            req = urllib.request(goldpoll_url)  # urllib2.Request(goldpoll_url)
            response = urllib.urlopen(req)  # urllib2.urlopen(req)
            the_page = response.read()
            webpage = the_page.decode("ISO-8859-1")
            parser = etree.HTMLParser()
            tree = etree.fromstring(webpage, parser)
            elements_c10 = tree.xpath('//table[@class="cl0"]')
            six.print_(elements_c10)
    if 'popularhyip' in sites:
        if method == 'static':
            dir_separator = ('\\' if is_windows() else '/')
            doc = html.parse('input' + dir_separator + geckoname)
            #print etree.tostring(doc)
            elements_status1 = doc.xpath(
                '//tr[@class="status1" and (not(@id))]')
            six.print_(len(elements_status1))
            for element in elements_status1:

Example #19

0

Show file

File: driver.py Project: bioidiap/bob.db.base

def download(arguments):
  """Downloads and uncompresses meta data generated files from Idiap

  Parameters:

    arguments (argparse.Namespace): A set of arguments passed by the
      command-line parser


  Returns:

    int: A POSIX compliant return value of ``0`` if the download is successful,
    or ``1`` in case it is not.


  Raises:

    IOError: if metafiles exist and ``--force`` was not passed

    urllib2.HTTPError: if the target resource does not exist on the webserver

  """

  # What should happen as a combination of flags. Legend:
  #
  # 0 - Exit, with status 0
  # X - Download, overwrite if there
  # R - Raise exception, err
  #
  # +----------+-----------+----------+--------+
  # | complete | --missing | --force  |  none  |
  # +----------+-----------+----------+--------+
  # |   yes    |     0     |    X     |   R    |
  # +----------+-----------+----------+--------+
  # |   no     |     X     |    X     |   X    |
  # +----------+-----------+----------+--------+

  if not arguments.files:
    print("Skipping download of metadata files for bob.db.%s: no files "
          "declared" % arguments.name)

  # Check we're complete in terms of metafiles
  complete = True
  for p in arguments.files:
    if not os.path.exists(p):
      complete = False
      break

  if complete:
    if arguments.missing:
      print("Skipping download of metadata files for `bob.db.%s': complete" %
            arguments.name)
      return 0
    elif arguments.force:
      print("Re-downloading metafiles for `bob.db.%s'" % arguments.name)
    else:
      raise IOError("Metadata files are already available. Remove metadata "
                    "files before attempting download or --force")

  # if you get here, all files aren't there, unpack
  source_url = os.path.join(arguments.source, arguments.name + ".tar.bz2")

  target_dir = arguments.test_dir  # test case

  if not target_dir:  # puts files on the root of the installed package

    import pkg_resources
    try:
      target_dir = pkg_resources.resource_filename('bob.db.%s' %
                                                   arguments.name, '')
    except ImportError as e:
      raise ImportError("The package `bob.db.%s' is not currently "
                        "installed. N.B.: The database and package names **must** "
                        "match. Your package should be named `bob.db.%s', if the driver "
                        "name for your database is `%s'. Check." % (3 * (arguments.name,)))

  # download file from Idiap server, unpack and remove it
  import sys
  import tempfile
  import tarfile
  import pkg_resources
  from .utils import safe_tarmembers
  if sys.version_info[0] <= 2:
    import urllib2 as urllib
  else:
    import urllib.request as urllib

  print ("Extracting url `%s' into `%s'" % (source_url, target_dir))
  u = urllib.urlopen(source_url)
  f = tempfile.NamedTemporaryFile(suffix=".tar.bz2")
  open(f.name, 'wb').write(u.read())
  t = tarfile.open(fileobj=f, mode='r:bz2')
  members = list(safe_tarmembers(t))
  for k, m in enumerate(members):
    print("x [%d/%d] %s" % (k + 1, len(members), m.name,))
    t.extract(m, target_dir)
  t.close()
  f.close()

Example #20

0

Show file

    def __init__(self, metadata_dir, accept_empty=True):
        """Initialize reader to metadata channel directory.

        Channel parameters are read from the attributes of the top-level file
        'dmd_properties.h5' in the `metadata_dir`.


        Parameters
        ----------

        metadata_dir : string
            Path to metadata channel directory, which contains a
            'dmd_properties.h5' file and timestamped subdirectories containing
            data.

        accept_empty : bool, optional
            If True, do not raise an IOError if the 'dmd_properties.h5' file is
            empty. If False, raise an IOError in that case and delete the
            empty 'dmd_properties.h5' file.


        Raises
        ------

        IOError
            If 'dmd_properties.h5' file is not found in `metadata_dir` or if
            `accept_empty` is False and the 'dmd_properties.h5' file is empty.

        """
        self._metadata_dir = metadata_dir
        if self._metadata_dir.find('http://') != -1:
            self._local = False
            # put properties file in /tmp/dmd_properties_%i.h5 % (pid)
            url = os.path.join(self._metadata_dir, 'dmd_properties.h5')
            try:
                f = urllib.urlopen(url)
            except (urllib.URLError, urllib.HTTPError):
                url = os.path.join(self._metadata_dir, 'metadata.h5')
                f = urllib.urlopen(url)
            tmp_file = os.path.join(
                '/tmp',
                'dmd_properties_%i.h5' % (os.getpid()),
            )
            fo = open(tmp_file, 'w')
            fo.write(f.read())
            f.close()
            fo.close()

        else:
            self._local = True
            # list and match first properties file
            tmp_file = next(
                (f for f in sorted(
                    glob.glob(
                        os.path.join(
                            metadata_dir,
                            list_drf.GLOB_DMDPROPFILE,
                        ))) if re.match(list_drf.RE_DMDPROP, f)),
                None,
            )
            if tmp_file is None:
                raise IOError('dmd_properties.h5 not found')

        with h5py.File(tmp_file, 'r') as f:
            try:
                subdir_cadence = f.attrs['subdir_cadence_secs'].item()
                file_cadence = f.attrs['file_cadence_secs'].item()
            except KeyError:
                # maybe an older version with subdirectory_cadence_seconds
                # and file_cadence_seconds
                subdir_cadence = f.attrs['subdirectory_cadence_seconds'].item()
                file_cadence = f.attrs['file_cadence_seconds'].item()
            self._subdir_cadence_secs = subdir_cadence
            self._file_cadence_secs = file_cadence
            try:
                try:
                    spsn = f.attrs['sample_rate_numerator'].item()
                    spsd = f.attrs['sample_rate_denominator'].item()
                except KeyError:
                    # maybe an older version with samples_per_second_*
                    spsn = f.attrs['samples_per_second_numerator'].item()
                    spsd = f.attrs['samples_per_second_denominator'].item()
            except KeyError:
                # must have an older version with samples_per_second attribute
                sps = f.attrs['samples_per_second'].item()
                spsfrac = fractions.Fraction(sps).limit_denominator()
                self._samples_per_second = numpy.longdouble(sps)
                self._sample_rate_numerator = int(spsfrac.numerator)
                self._sample_rate_denominator = int(spsfrac.denominator)
            else:
                self._sample_rate_numerator = spsn
                self._sample_rate_denominator = spsd
                # have to go to uint64 before longdouble to ensure correct
                # conversion from int
                self._samples_per_second = (
                    numpy.longdouble(numpy.uint64(
                        self._sample_rate_numerator)) / numpy.longdouble(
                            numpy.uint64(self._sample_rate_denominator)))
            self._file_name = f.attrs['file_name']
            try:
                version = f.attrs['digital_metadata_version']
            except KeyError:
                # version is before 2.3 when attribute was added
                version = '2.0'
            self._digital_metadata_version = version
            self._check_compatible_version()
            try:
                fields_dataset = f['fields']
            except KeyError:
                if not accept_empty:
                    os.remove(tmp_file)
                    errstr = ('No metadata yet written to %s, removing empty'
                              ' "dmd_properties.h5"')
                    raise IOError(errstr % self._metadata_dir)
                else:
                    self._fields = None
                    return
            self._fields = []
            for i in range(len(fields_dataset)):
                self._fields.append(fields_dataset[i]['column'])

        if not self._local:
            os.remove(tmp_file)

Example #21

0

Show file

    BODY_WIDTH = options.body_width
    GOOGLE_LIST_INDENT = options.list_indent

    # process input
    if len(args) > 0:
        file_ = args[0]
        encoding = None
        if len(args) == 2:
            encoding = args[1]
        if len(args) > 2:
            p.error('Too many arguments')

        if file_.startswith('http://') or file_.startswith('https://'):
            baseurl = file_
            j = urllib.urlopen(baseurl)
            text = j.read()
            if encoding is None:
                try:
                    from feedparser import _getCharacterEncoding as enc
                except ImportError:
                    enc = lambda x, y: ('utf-8', 1)
                encoding = enc(j.headers, text)[0]
                if encoding == 'us-ascii':
                    encoding = 'utf-8'
            data = text.decode(encoding)

        else:
            data = open(file_, 'rb').read()
            if encoding is None:
                try:

Example #22

0

Show file

def fetch(url):
    return urllib.urlopen(url).read()

Example #23

0

Show file

File: wmd.py Project: sathish-deevi/pysal

def _uri_reader(uri):
    j = json.load(urllib.urlopen(uri))
    return j

Example #24

0

Show file

def scrape(file_name, number_of_images):
    f = open(file_name,'r')
    url_file = f.read()
    url_list = url_file.split('\n')
    index = 0

    matrices = dict()

    matrices["up"] = list()
    matrices["left"] = list()
    matrices["right"] = list()
    matrices["down"] = list()

    for url in url_list:
        url_list = url.split('\t')
        real_url = url_list[1]

        try:
            file = cStringIO.StringIO(urllib.urlopen(real_url).read())
            img = Image.open(file)

            opencvImage = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

            width = opencvImage.shape[0]
            height = opencvImage.shape[1]

            x = int(height/2) - 64
            y = int(width/2) - 64
            crop_image_height = 32
            crop_image_width = 32

            opencv_image = opencvImage[y:y + crop_image_height, x:x + crop_image_height]

            opencv_image = cv2.cvtColor(opencv_image, cv2.COLOR_BGR2GRAY)

            image_utils.saveImage("up.jpg", opencv_image)

            for up_row in opencv_image:
                matrices["up"].append(up_row)

            left_image = image_utils.rotate(opencv_image, 90)

            image_utils.saveImage("left.jpg", left_image)

            for left_row in left_image:
                matrices["left"].append(left_row)

            right_image = image_utils.rotate(opencv_image, 270)
            image_utils.saveImage("right.jpg", right_image)
            for right_row in right_image:
                matrices["right"].append(right_row)

            down_image = image_utils.rotate(opencv_image, 180)
            image_utils.saveImage("down.jpg", down_image)
            for down_row in down_image:
                matrices["down"].append(down_row)


            # last_row = list()

            # for _ in range(0,31):
                # last_row.append(-1)

            # matrices["up"].append(last_row)
            # matrices["left"].append(last_row)
            # matrices["right"].append(last_row)
            # matrices["down"].append(last_row)
            print (index)
            index += 1
            if(index >= number_of_images):
                break;
        except:
            continue

    return matrices

Example #25

0

Show file

File: coco.py Project: johannathiemich/Mobile_Mask_RCNN

    def auto_download(self, dataDir, dataType, dataYear):
        """Download the COCO dataset/annotations if requested.
        dataDir: The root directory of the COCO dataset.
        dataType: What to load (train, val, minival, valminusminival)
        dataYear: What dataset year to load (2014, 2017) as a string, not an integer
        Note:
            For 2014, use "train", "val", "minival", or "valminusminival"
            For 2017, only "train" and "val" annotations are available
        """

        # Setup paths and file names
        if dataType == "minival" or dataType == "valminusminival":
            imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
            imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
            imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(
                "val", dataYear)
        else:
            imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
            imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
            imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(
                dataType, dataYear)
        # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)

        # Create main folder if it doesn't exist yet
        if not os.path.exists(dataDir):
            os.makedirs(dataDir)

        # Download images if not available locally
        if not os.path.exists(imgDir):
            os.makedirs(imgDir)
            print("> Downloading images to " + imgZipFile + " ...")
            with urllib.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
                shutil.copyfileobj(resp, out)
            print("... done downloading.")
            print("> Unzipping " + imgZipFile)
            with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
                zip_ref.extractall(dataDir)
            print("... done unzipping")
        print("> Will use images in " + imgDir)

        # Setup annotations data paths
        annDir = "{}/annotations".format(dataDir)
        if dataType == "minival":
            annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
            annFile = "{}/instances_minival2014.json".format(annDir)
            annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
            unZipDir = annDir
        elif dataType == "valminusminival":
            annZipFile = "{}/instances_valminusminival2014.json.zip".format(
                dataDir)
            annFile = "{}/instances_valminusminival2014.json".format(annDir)
            annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
            unZipDir = annDir
        else:
            annZipFile = "{}/annotations_trainval{}.zip".format(
                dataDir, dataYear)
            annFile = "{}/instances_{}{}.json".format(annDir, dataType,
                                                      dataYear)
            annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(
                dataYear)
            unZipDir = dataDir
        # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)

        # Download annotations if not available locally
        if not os.path.exists(annDir):
            os.makedirs(annDir)
        if not os.path.exists(annFile):
            if not os.path.exists(annZipFile):
                print("> Downloading zipped annotations to " + annZipFile +
                      " ...")
                with urllib.urlopen(annURL) as resp, open(annZipFile,
                                                          'wb') as out:
                    shutil.copyfileobj(resp, out)
                print("... done downloading.")
            print("> Unzipping " + annZipFile)
            with zipfile.ZipFile(annZipFile, "r") as zip_ref:
                zip_ref.extractall(unZipDir)
            print("... done unzipping")
        print("> Will use annotations in " + annFile)

Example #26

0

Show file

File: classify_image.py Project: deeppatel710/fedex

    file_path = os.path.join(folder, the_file)
    try:
        if os.path.isfile(file_path):
            os.unlink(file_path)
    except Exception as e:
        print(e)

page = BeautifulSoup(open("food.html"))
images = []
for img in page.findAll('img'):
    if 'li=' not in img['src']:
        images.append(img['src'])

i = 0
for img in images:
    path = io.BytesIO(urllib.urlopen(img).read())
    path.seek(0, 2)  # 0 bytes from the end
    size = path.tell()

    if size < 7000:
        continue

    path = os.path.join('./images', '%s.jpg' % i)
    response = urllib.urlretrieve(img, path)
    i += 1

r = redis.StrictRedis(host='localhost', port=6379, db=0)

FLAGS = None

# pylint: disable=line-too-long

Example #27

0

Show file

topics = ["Table of Contents", "You Will Learn", " Course Syllabus", "Learning Objectives", "Learn How To:"]

coursetypes = soup.find_all('div', class_="clearfix block block-views")
for coursetype in coursetypes:
	coursetype = coursetype.find('h2').contents[0]
	ins = coursetype.find_next('tbody')
	trall = ins.find_all('tr')
	for tr in trall:
		temp = tr.find_next('td')
		ct = temp.find_next('a')
		ctcontents = ct.contents[0]
		url2 = prefix+ct['href']
		try:
			html2= urllib.request.urlopen(url2)
		except AttributeError:
			html2 = urllib.urlopen(url)
		soup2 = bs4.BeautifulSoup(html2, "html.parser")

		instructor = ct.find_next('td').contents[0]
		platform = instructor.find_next('td').contents[0]
		startdate = platform.find_next('span').contents[0]
		regfees = startdate.find_next('td').contents[0]
		status = regfees.find_next('td').contents[0]
		dept = status.find_next('a').contents[0]
		platform = platform.lstrip().rstrip()
		status = status.lstrip().rstrip()
		print(ctcontents,"\n")
		file.write('Type of course: ')
		file.write(coursetype)
		file.write("\nCourse Title: ")
		file.write(ctcontents.encode('utf-8'))

Example #28

0

Show file

File: driver.py Project: xingmurong/hr-cnn

def download(arguments):
    """Downloads and uncompresses meta data generated files from Idiap

  Parameters:

    arguments (argparse.Namespace): A set of arguments passed by the
      command-line parser


  Returns:

    int: A POSIX compliant return value of ``0`` if the download is successful,
    or ``1`` in case it is not.


  Raises:

    IOError: if metafiles exist and ``--force`` was not passed

    urllib2.HTTPError: if the target resource does not exist on the webserver

  """

    # What should happen as a combination of flags. Legend:
    #
    # 0 - Exit, with status 0
    # X - Download, overwrite if there
    # R - Raise exception, err
    #
    # +----------+-----------+----------+--------+
    # | complete | --missing | --force  |  none  |
    # +----------+-----------+----------+--------+
    # |   yes    |     0     |    X     |   R    |
    # +----------+-----------+----------+--------+
    # |   no     |     X     |    X     |   X    |
    # +----------+-----------+----------+--------+

    if not arguments.files:
        print("Skipping download of metadata files for bob.db.%s: no files "
              "declared" % arguments.name)

    # Check we're complete in terms of metafiles
    complete = True
    for p in arguments.files:
        if not os.path.exists(p):
            complete = False
            break

    if complete:
        if arguments.missing:
            print(
                "Skipping download of metadata files for `bob.db.%s': complete"
                % arguments.name)
            return 0
        elif arguments.force:
            print("Re-downloading metafiles for `bob.db.%s'" % arguments.name)
        else:
            raise IOError(
                "Metadata files are already available. Remove metadata "
                "files before attempting download or --force")

    # if you get here, all files aren't there, unpack
    source_url = os.path.join(arguments.source, arguments.name + ".tar.bz2")

    target_dir = arguments.test_dir  # test case

    if not target_dir:  # puts files on the root of the installed package

        import pkg_resources
        try:
            target_dir = pkg_resources.resource_filename(
                'bob.db.%s' % arguments.name, '')
        except ImportError as e:
            raise ImportError(
                "The package `bob.db.%s' is not currently "
                "installed. N.B.: The database and package names **must** "
                "match. Your package should be named `bob.db.%s', if the driver "
                "name for your database is `%s'. Check." %
                (3 * (arguments.name, )))

    # download file from Idiap server, unpack and remove it
    import sys
    import tempfile
    import tarfile
    import pkg_resources
    from .utils import safe_tarmembers
    if sys.version_info[0] <= 2:
        import urllib2 as urllib
    else:
        import urllib.request as urllib

    print("Extracting url `%s' into `%s'" % (source_url, target_dir))
    u = urllib.urlopen(source_url)
    f = tempfile.NamedTemporaryFile(suffix=".tar.bz2")
    open(f.name, 'wb').write(u.read())
    t = tarfile.open(fileobj=f, mode='r:bz2')
    members = list(safe_tarmembers(t))
    for k, m in enumerate(members):
        print("x [%d/%d] %s" % (
            k + 1,
            len(members),
            m.name,
        ))
        t.extract(m, target_dir)
    t.close()
    f.close()

Example #29

0

Show file

File: aurum.py Project: ResearchGeek/hyip-monitors-scraping

                            #index = cl2.index(content) + 1
                            hyip.addPayment_method('Bankwire')
                    else:
                        six.print_('payment methods parsed')
                six.print_(small2)
                six.print_(cl2)
                six.print_(tabl0)

                output(hyip)
        elif method == 'mechanize':
            six.print_('Not supported yet! Use native or dont define @method at all')
        elif method == 'urllib2':
            six.print_('Not supported yet! Use native or dont define @method at all')
            exit(1)
            req = urllib.request(goldpoll_url)  # urllib2.Request(goldpoll_url)
            response = urllib.urlopen(req)  # urllib2.urlopen(req)
            the_page = response.read()
            webpage = the_page.decode("ISO-8859-1")
            parser = etree.HTMLParser()
            tree = etree.fromstring(webpage, parser)
            elements_c10 = tree.xpath('//table[@class="cl0"]')
            six.print_(elements_c10)
    if 'popularhyip' in sites:
        if method == 'static':
            dir_separator = ('\\' if is_windows() else '/')
            doc = html.parse('input' + dir_separator + geckoname)
            #print etree.tostring(doc)
            elements_status1 = doc.xpath('//tr[@class="status1" and (not(@id))]')
            six.print_(len(elements_status1))
            for element in elements_status1:
                six.print_('')