Ejemplo n.º 1
0
    def run(self):
        global cnt
        while True:
            lock.acquire()
            try:
                d = target_file.next()
            except:
                lock.release()
                break

            sp = d.strip().split()
            dirname = "down/%s" % sp[1] 
            try:
                os.makedirs(dirname)
            except:
                pass
            cnt += 1
            if cnt % 10000 == 0:
                print "downloading %d" % cnt
            lock.release()

            try:
                wget.download(sp[2], out=dirname, bar=None)
            except:
                print("error downloading %s" % sp[2])
            if(cnt >= 100): break
Ejemplo n.º 2
0
def DownloadData():
    """ Downloads the shapefiles """
    tar_dir = "./tars"
    for dir in [shape_dir, tar_dir]:
        if not os.path.exists(dir):
            os.makedirs(dir)

    base_url = "http://census.edina.ac.uk/ukborders/easy_download/prebuilt/shape/"
    files_list = ["England_ct_2011_gen_clipped.tar.gz",
                  "England_ol_2011_gen_clipped.tar.gz",
                  "Wales_ct_1991_gen3.tar.gz",
                  "Wales_ol_2011_gen_clipped.tar.gz",
                  "Scotland_dt_1991.tar.gz",
                  "Scotland_ol_1991.tar.gz",
                  "Gb_dt_2009_10.tar.gz",
                  "Gb_wpc_2010_05.tar.gz"
                  ]
    print("Downloading shape files...")
    for f in files_list:
        if not os.path.exists(os.path.join(tar_dir, f)):
            url = base_url + f
            print("Downloading {}".format(url))
            wget.download(url, out=tar_dir)
            print()

        print("Unpacking {}".format(f))
        tar = tarfile.open(os.path.join(tar_dir, f))
        tar.extractall(path=shape_dir)
        tar.close()

    print("Done, all shape-files stored in {}".format(shape_dir))
Ejemplo n.º 3
0
def load_sqlite(table, dbase, url=False, out=False):
    """
    Retrieve triples from an sqlite3 database.
    """
    if url:
        # check for short path to url
        if url.startswith('http://') or url.startswith('https://'):
            pass
        else:
            url = 'http://tsv.lingpy.org/triples/'+url
            print(url)
            
        # check if file already exists
        if os.path.isfile(dbase):
            os.rename(
                    dbase,
                    dbase+'-backup-'+str(datetime.datetime.now()).split('.')[0]
                    )
        wget.download(url, out=dbase)
        
    db = sqlite3.connect(dbase)
    cursor = db.cursor()

    cursor.execute('select * from '+table+';')

    data = cursor.fetchall()
    
    return lingpy.basic.ops.triple2tsv(data, output='dict')
Ejemplo n.º 4
0
def __make_icon_osx():
    lisa_shortcut = op.expanduser("~/Desktop/lisa")
    if not os.path.exists(lisa_shortcut):
        with open(lisa_shortcut, 'w') as outfile:
            outfile.write(
            "\
#!/bin/bash\n\
export PATH=$HOME/miniconda2/bin:$HOME/anaconda2/bin:$HOME/miniconda/bin:$HOME/anaconda/bin:$PATH\n\
lisa"
            )
        os.chmod(lisa_shortcut,
                 stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH |
                 stat.S_IRUSR | stat.S_IRGRP | stat.S_IXOTH |
                 stat.S_IWUSR | stat.S_IWGRP
                 )

    import wget
    lisa_icon_path= op.expanduser("~/lisa_data/.lisa/LISA256.icns")
    if not os.path.exists(lisa_icon_path):
        try:
            wget.download(
                "https://raw.githubusercontent.com/mjirik/lisa/master/applications/LISA256.icns",
                out=lisa_icon_path
            )
        except:
            logger.warning('logo download failed')
            pass
Ejemplo n.º 5
0
    def updateFiles(self):

        print "Update Files"
        # Clean out file array
        self.data.params.files = []

        # Always assume that the most up to date runtime is not yet available
        runtime = ((self.current_time.hour-6) / 6) * 6	# Get the Model Runtime
        if runtime < 0:
            runtime = 0

        launch_time_offset = self.launch_time - self.current_time

        # For now, if the prediction take place in the past... don't
        if launch_time_offset < timedelta(0):
            launch_time_offset = timedelta(0)

        prediction_offset = (launch_time_offset.seconds / 3600 / 3) * 3

        ### NOTE THIS ISN'T DONE!
        self.data.params.files.append("./wind/49-43-290-294-%04d%02d%02d%02d-gfs.t%02dz.mastergrb2f%02d" % (self.current_time.year, self.current_time.month, self.current_time.day, prediction_offset, runtime, prediction_offset))
        if not os.path.isfile("./wind/49-43-290-294-%04d%02d%02d%02d-gfs.t%02dz.mastergrb2f%02d" % (self.current_time.year, self.current_time.month, self.current_time.day, prediction_offset, runtime, prediction_offset)):
            download_url = "http://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_hd.pl?file=gfs.t%02dz.mastergrb2f%02d&leftlon=290&rightlon=294&toplat=49&bottomlat=43&dir=%%2Fgfs.%04d%02d%02d%02d%%2Fmaster" % (runtime, prediction_offset, self.launch_time.year, self.launch_time.month, self.launch_time.day, runtime)
            print download_url
            print (runtime, prediction_offset, self.current_time.year, self.current_time.month, self.current_time.day, runtime)
            file = wget.download(download_url)
            shutil.move(file, './wind/49-43-290-294-%04d%02d%02d%02d-%s' % (self.current_time.year, self.current_time.month, self.current_time.day, prediction_offset, file))
        self.data.params.files.append("./wind/49-43-290-294-%04d%02d%02d%02d-gfs.t%02dz.mastergrb2f%02d" % (self.current_time.year, self.current_time.month, self.current_time.day, prediction_offset+3, runtime, prediction_offset+3))
        if not os.path.isfile("./wind/49-43-290-294-%04d%02d%02d%02d-gfs.t%02dz.mastergrb2f%02d" % (self.current_time.year, self.current_time.month, self.current_time.day, prediction_offset+3, runtime, prediction_offset+3)):
            download_url = "http://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_hd.pl?file=gfs.t%02dz.mastergrb2f%02d&leftlon=290&rightlon=294&toplat=49&bottomlat=43&dir=%%2Fgfs.%04d%02d%02d%02d%%2Fmaster" % (runtime, prediction_offset+3, self.current_time.year, self.current_time.month, self.current_time.day, runtime)
            file = wget.download(download_url)
            shutil.move(file, './wind/49-43-290-294-%04d%02d%02d%02d-%s' % (self.current_time.year, self.current_time.month, self.current_time.day, prediction_offset+3, file))
Ejemplo n.º 6
0
    def download_song(self, songTitle, song_ID, download_dir):

        song_ID = str(song_ID)
        download_url = (musicCrawler.music_download_url + song_ID + ".mp3")
        song_local_path = os.path.join(download_dir, songTitle)

        print(30 * "-")
        print("Downloading Song: " + songTitle)

        # handle exception if mp3 file is not found on url source

        try:
            wget.download(download_url, download_dir)  # download the mp3 file from url to download directory

        except Exception:
            print("Song ", songTitle + " Not Found")
            pass

        # join the song Id  with the download dir to get the song tittle path
        song_ID = (song_ID+".mp3")
        song_ID_path = os.path.join(download_dir, song_ID)
        song_title_path = os.path.join(download_dir, songTitle)

        try:
            print("\n""Parsing Song: " + songTitle)
            shutil.move(song_ID_path, song_title_path)  # parse the song id with actual song name
            print(30 * "-")
        except FileNotFoundError:
            print("Song ID ", song_ID + " Not Found")
            pass
Ejemplo n.º 7
0
def download_file(filename, destination):
    """Download remote file using the `wget` Python module."""
    destdir = os.path.split(destination)[0]
    if not os.path.isdir(destdir):
        os.makedirs(destdir)
    url = get_remote_url(filename)
    wget.download(url, out=destination)
Ejemplo n.º 8
0
 def _download(self, url, checksum, dst):
     import wget
     retries = 0
     while retries < 2:
         if not os.path.exists(dst):
             retries += 1
             try:
                 wget.download(url, out=dst)
                 print
             except Exception as e:
                 print
                 print 'error', e
                 continue
         h = hashlib.sha1()
         with open(dst, 'rb') as fp:
             while True:
                 d = fp.read(4 * 1024 * 1024)
                 if not d:
                     break
                 h.update(d)
         if h.hexdigest() == checksum:
             break
         print 'sha1 does not match: %s instead of %s' % (h.hexdigest(), checksum)
         os.unlink(dst)
     assert os.path.exists(dst), 'could not successfully retrieve %s' % url
Ejemplo n.º 9
0
    def download_gif(self, term, slide_num):
        # If we have at least 3 local gifs, use one of those
        if (term in self.gifs) and (len(self.gifs[term]) > 3):
            return os.path.join("GIFs", "%s.gif" % random.choice(self.gifs[term]))

        try:
            # Download the gif
            #img = translate(term, app_key=self.GIPHY_API_KEY)
            img = translate(term)
            
            image_path = os.path.join(self.resources_dir, "%d.gif" % slide_num)
            wget.download(img.media_url, image_path)

            if not (term in self.gifs):
                self.gifs[term] = []

            if not (img.id in self.gifs[term]):
                self.gifs[term].append(img.id)
                shutil.copy(image_path, os.path.join("GIFs", "%s.gif" % img.id))
                with open(os.path.join("GIFs", "hashes.json"), "w") as f:
                    json.dump(self.gifs, f, indent=2)

            return image_path
        except:
            return None
Ejemplo n.º 10
0
    def get_astrometry(self):
        """
        Interact with astrometry.net to get the WCS for our image, using the 
         astrometry client code.
        """
        # connect to astrometry.net
        supernova_key = 'jhvrmcmwgufgmsrw'
        supernova_url = 'http://supernova.astrometry.net/api/'
        nova_key = 'tugzsuwnbcykkeuy'
        nova_url = 'http://nova.astrometry.net/api/'
        new_image = self.image.replace('.fits','.wcs.fits')

        # The short routines below are pulled from astrometryClient/client.py in the __main__
        c = anClient(apiurl=nova_url)
        c.login(nova_key)
        
        # upload the image
        print '\n\nUploading image to astrometry.net\n\n'
        kwargs = {'publicly_visible': 'y', 'allow_modifications': 'd', 'allow_commercial_use': 'd'}
        upres = c.upload(self.image, **kwargs)
        stat = upres['status']
        if stat != 'success':
            raise IOError('Upload failed: status %s\n %s\n' %(str(stat), str(upres)))
        subID = upres['subid']
        print '\n\nUpload successful. Submission id:',subID,'\n\n'
        
        # Wait for the response
        while True:
            stat = c.sub_status(subID, justdict=True)
            jobs = stat.get('jobs', [])
            if len(jobs):
                for j in jobs:
                    if j is not None:
                        break
                if j is not None:
                    print '\n\nReceived job id',j,'\n\n'
                    jobID = j
                    break
            time.sleep(5)

        # wait for the calculation to finish
        success = False
        while True:
            stat = c.job_status(jobID, justdict=True)
            if stat.get('status','') in ['success']:
                success = (stat['status'] == 'success')
                break
            time.sleep(5)
        if not success:
            raise IOError('astrometry.net query failed: status %s'%str(stat))
        
        # download the new image
        print '\n\nGrabbing solved image\n\n'
        url = nova_url.replace('api','new_fits_file/%i' %jobID)
        try:
            os.remove( new_image )
        except OSError:
            pass
        wget.download( url, out=new_image )
        self.image = new_image
Ejemplo n.º 11
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    print('Creating simple wiki serialized corpus')
    # Download the raw file if we do not have it already
    if not os.path.isfile(WIKIFILE):
        # Get the file
        wget.download(WIKIURL)
    wiki = WikiCorpus(WIKIFILE, lemmatize=False)
    i = 0
    article_dict = {}
    for text in wiki.get_texts(meta=True):
        url_string = 'https://simple.wikipedia.org/wiki/?curid={}'
        article_dict[i] = (url_string.format(text[0]), text[1])
        i += 1
    with open(ARTICLEDICT, 'w') as f:
        json.dump(article_dict, f)
    wiki.dictionary.filter_extremes(no_below=20, no_above=0.1,
                                    keep_n=DEFAULT_DICT_SIZE)
    MmCorpus.serialize(MMFILE, wiki, progress_cnt=10000, )
    wiki.dictionary.save_as_text(DICTFILE)
    print('Simple wiki serialized corpus created')
    # Now run LSI
    dictionary = Dictionary.load_from_text(DICTFILE)
    mm = MmCorpus(MMFILE)
    tfidf = TfidfModel(mm, id2word=dictionary, normalize=True)
    tfidf.save(TDIFMODEL)
    MmCorpus.serialize(TDIFFILE, tfidf[mm], progress_cnt=10000)
    mm_tdif = MmCorpus(TDIFFILE)
    lsi = LsiModel(mm_tdif, id2word=dictionary, num_topics=300)
    index = similarities.MatrixSimilarity(lsi[mm_tdif])
    index.save(SIMMATRIX)
    lsi.save(LSIMODEL)
    print("LSI model and index created")
Ejemplo n.º 12
0
def run(argv):
    if not os.path.exists(clean_filepath):
        print('dbsnp will be stored at {!r}'.format(clean_filepath))
        if not os.path.exists(raw_filepath):

            # dbSNP downloads are described at <https://www.ncbi.nlm.nih.gov/variation/docs/human_variation_vcf/>
            # This file includes chr-pos-ref-alt-rsid and 4X a bunch of useless columns:
            dbsnp_url = 'ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b{}_GRCh37p13/VCF/00-All.vcf.gz'.format(dbsnp_version)

            print('Downloading dbsnp!')
            make_basedir(raw_filepath)
            raw_tmp_filepath = get_tmp_path(raw_filepath)
            wget.download(url=dbsnp_url, out=raw_tmp_filepath)
            print('')
            os.rename(raw_tmp_filepath, raw_filepath)
            print('Done downloading.')

        print('Converting {} -> {}'.format(raw_filepath, clean_filepath))
        make_basedir(clean_filepath)
        clean_tmp_filepath = get_tmp_path(clean_filepath)
        run_script(r'''
        gzip -cd '{raw_filepath}' |
        grep -v '^#' |
        perl -F'\t' -nale 'print "$F[0]\t$F[1]\t$F[2]\t$F[3]\t$F[4]"' | # Gotta declare that it's tab-delimited, else it's '\s+'-delimited I think.
        gzip > '{clean_tmp_filepath}'
        '''.format(raw_filepath=raw_filepath, clean_tmp_filepath=clean_tmp_filepath))
        os.rename(clean_tmp_filepath, clean_filepath)

    print("dbsnp is at '{clean_filepath}'".format(clean_filepath=clean_filepath))
def download_if_needed(url, filename):

    """ Downloads the data from a given URL, if not already present in the directory, or displays any of the following:
        1. The file already exists
        2. URL does not exist
        3. Server is not responding
    """

    if os.path.exists(filename):
        explanation = filename+ ' already exists'
        return explanation
    else:
        try:
            r = urlopen(url)
        except URLError as e:
            r = e
        if r.code < 400:
            wget.download(url)
            explanation = 'downloading'
            return explanation
        elif r.code>=400 and r.code<500:
            explanation = 'Url does not exist'
            return explanation
        else:
            explanation = 'Server is not responding'
            return explanation
Ejemplo n.º 14
0
def download_bigbird_models():

	if not os.path.exists(RAW_DOWNLOAD_DIR):
		os.mkdir(RAW_DOWNLOAD_DIR)

	url = "http://rll.berkeley.edu/bigbird/aliases/772151f9ac/"
	req = urllib2.Request(url)
	res = urllib2.urlopen(req)

	html_split = res.read().split()

	model_names = []
	for txt in html_split:
	    if "/bigbird/images" in txt:
	        model_names.append(txt[29:-5])
	
	for model_name in model_names:
		print ""
		print model_name
		if not os.path.exists(RAW_DOWNLOAD_DIR + '/' + model_name):
			if os.path.exists(os.getcwd() + '/' + TAR_FILE_NAME):
				os.remove(os.getcwd() + '/' + TAR_FILE_NAME)

			download_url = "http://rll.berkeley.edu/bigbird/aliases/772151f9ac/export/" + model_name + "/" + TAR_FILE_NAME
			wget.download(download_url)
			t = tarfile.open(os.getcwd() + '/' + TAR_FILE_NAME, 'r')
			t.extractall(RAW_DOWNLOAD_DIR)
Ejemplo n.º 15
0
def _parse_page_urls_and_make_url_list(url_list, credentials, downloaddir, file_extns_of_intrest):

    for url in url_list:
        if credentials != None:
            page_url = _convert_url_to_url_with_password(url, credentials)
        else:
            page_url = url

        logger.info("downloading " + page_url)

        try:
            # remove any previously existing temp file, this is needed because if a file exists then
            # wget does some name mangling to create a file with a different name and then that would
            # need to be passed to BS4 and then ultimately that file would also be deleted, so just delete
            # before hand.
            if os.path.exists(TEMP_DOWNLOAD_FILE):
                os.remove(TEMP_DOWNLOAD_FILE)
            wget.download(page_url, TEMP_DOWNLOAD_FILE, bar=_download_progress_bar)
            soup = BeautifulSoup(open(TEMP_DOWNLOAD_FILE))

            links = soup.findAll(ANCHOR_TAG)

            _make_list_of_download_candidates(page_url, links, downloaddir, file_extns_of_intrest)
        except Exception, e:
            logger.error("Exception: " + str(e))
Ejemplo n.º 16
0
def download_videos(m3u8_obj):
	print '[log] downloading videos'
	base_uri = m3u8_obj._base_uri

	for file in m3u8_obj.files:
		video_url = base_uri + "/" + file
		wget.download(video_url)
Ejemplo n.º 17
0
def gdb():
    try:
        gdb = dict()
        pre1 = "http://sourceware.org/gdb/current/onlinedocs/"
        pre2 = "https://sourceware.org/gdb/talks/esc-west-1999/"
        gdb[1] = pre1 + "gdb.pdf.gz"
        gdb[2] = pre2 + "paper.pdf"
        gdb[3] = pre2 + "slides.pdf"
        print stringa
        print "GDB Documentation"
        print "GDB User Manual"
        filename = wget.download(gdb[1])
        print "\nThe Heisenberg Debugging Technology"
        print "Slides/Paper/Enter(for both)"
        decision = raw_input()
        if decision == "Paper":
            filename = wget.download(gdb[2])
        elif decision == "Slides":
            filename = wget.download(gdb[3])
        else:
            for key in range(2,4):
#                print key
                filename = wget.download(gdb[key])
            print "\nCompleted\n"
    except:
        print "\n Did something else happen ? \n"
Ejemplo n.º 18
0
def get_sdss_dr7_frame(run, camcol, field, band, rerun=40):
    url, psfield_url, calib_url = \
        get_sdss_dr7_frame_url(run, camcol, field, band, rerun)

    # download files if necessary
    import wget
    if not os.path.exists(img_filename):
        img_filename = wget.download(url)
    if not os.path.exists(ps_filename):
        ps_filename  = wget.download(psfield_url)

    # load calibration data to get sky noise
    ps_data  = fitsio.FITS(ps_filename)[6].read()

    # create fitsfile
    img_data = fitsio.FITS(img_filename)[0].read()
    img_header = fitsio.read_header(img_filename)

    import CelestePy.fits_image as fits_image
    reload(fits_image)
    imgfits = fits_image.FitsImage(band,
                              timg=imgs[band],
                              calib=1.,
                              gain=gain,
                              darkvar=darkvar,
                              sky=0.)
Ejemplo n.º 19
0
    def download_gif(self, term, slide_num):
        # If we have at least 3 local gifs, use one of those
        if (term in self.gifs) and (len(self.gifs[term]) > 3):
            return os.path.join("GIFs", "%s.gif" % random.choice(self.gifs[term]))

        try:
            # Download the gif
            img = translate(term)
            image_path = os.path.join(self.resources_dir, "%d.gif" % slide_num)
            wget.download(img.fixed_height.url, image_path)

            file_hasher = hashlib.md5()
            with open(image_path, "rb") as f:
                file_hasher.update(f.read())
            file_md5 = file_hasher.hexdigest()

            if not (term in self.gifs):
                self.gifs[term] = []

            if not (file_md5 in self.gifs[term]):
                self.gifs[term].append(file_md5)
                shutil.copy(image_path, os.path.join("GIFs", "%s.gif" % file_md5))
                with open(os.path.join("GIFs", "hashes.json"), "w") as f:
                    json.dump(self.gifs, f, indent=2)

            return image_path
        except:
            return None
Ejemplo n.º 20
0
def get_webapi_brand_image_link_per_country_lang(csku, lang=None, directory=None):
    """ Accesses the Zalando Website API and pulls information for article brand, as well as a link
            for an article picture.
            :param csku: The csku name to pull data for
            :param lang: The country to access
            :type csku: str
            :type lang: str-str
            :return: The url of the csku picture, and the brand name of the csku
            :rtype: dictionary_object
            """
    try:
        web_request = \
            'https://api.zalando.com/articles/{c}?fields=media.images.largeHdUrl'.format(c=csku)
        webapi_brand_image_url = requests.get(web_request, headers={'x-client-name': 'Team AIS Preorder PQRS API'})
        result = json.loads(webapi_brand_image_url.text)

        # In case of 404 http error or any http error.
        # the result will be assigned here with the error message.
        # Then the default values are returned.
        if 'status' in result.keys():
            raise DataNotFound

        elif result is not None:
            # Get the brand


            if 'media' in result.keys() and 'images' in result['media'].keys():
                for x in result['media']['images']:
                    if 'largeHdUrl' in x.keys():
                        pic_url = x['largeHdUrl']
                        wget.download(pic_url, out=directory)


    except DataNotFound:
        pass
def get_tles():


    # GetTLEs(): returns a list of tuples of kepler parameters for each satellite.
    resource = 'http://www.celestrak.com/norad/elements/resource.txt'
    weather = 'http://www.celestrak.com/norad/elements/weather.txt'
    try:
        os.remove('resource.txt')
    except OSError:
        pass
    try:
        os.remove('weather.txt')

    except OSError:
        pass
    wget.download(resource)
    wget.download(weather)
    file_names = ['weather.txt', 'resource.txt']
    with open('tles.txt', 'w') as outfile:
        for fname in file_names:
            with open(fname) as infile:
                for line in infile:
                    outfile.write(line)

    tles = open('tles.txt', 'r').readlines()

    print "retrieving TLE file.........."
    # strip off the header tokens and newlines
    tles = [item.strip() for item in tles]

    # clean up the lines
    tles = [(tles[i], tles[i+1], tles[i+2]) for i in xrange(0, len(tles)-2, 3)]

    return tles
Ejemplo n.º 22
0
    def _download(self):
        if os.path.exists(self._target_file):
            if self.overwrite:
                log.info("Chose to overwrite old files.")
                self._clean()
            elif not self.verify():
                log.error("Previous download seems corrupted.")
                self._clean()
            else:
                log.info("Using previously downloaded %s" % self.filename)
                return self.filename
        elif not os.path.exists(self.directory):
            log.debug("Creating %s" % self.directory)
            os.mkdir(self.directory)

        try:
            for filename in [self.filename, self.filename + self.CHECKSUM_SUFFIX]:
                log.debug("Downloading %s" % filename)
                wget.download(self.base_url + filename, out=self.directory, bar=None)
            if self.verify():
                log.info(("Successfully downloaded: %s" % filename))
                return self._target_file
            else:
                return None
        except Exception as e:
            log.debug("Failed to download %s: %s" % (filename, e))
Ejemplo n.º 23
0
    def doTask(self, tstamp):
        """Download image."""
        tstamp = coils.string2time(tstamp)
        fname = coils.time2fname(tstamp) + '.jpg'
        dest_dir = os.path.join(self._config['pics_dir'], coils.time2dir(tstamp))
        dest_fname = os.path.join(
            dest_dir,
            fname,
            )
        if os.path.exists(dest_fname):
            print('Skipping {}'.format(dest_fname))
            return    
        try:
            os.makedirs(dest_dir)
        except os.error:
            pass
        saved = os.getcwd()
        os.chdir(dest_dir)
        url = '{}/pics/{}.jpg'.format(
            self._url,
            coils.time2fname(tstamp, full=True),
            )
        print(url)
        wget.download(url, bar=None)
        os.chdir(saved)

        # Propagate timestamp downstream.
        return tstamp
Ejemplo n.º 24
0
def foo():
    fin=open(sys.argv[1],'r')
    for line in fin:
        a,b=line.strip().rstrip('\n').split(',')
        c=b.strip('"')+'_'+a.strip('"')+'.pdf'
        makeurl='http://www.tpcuiet.com/resume_upload/cannot_find_it_haha/{}'.format(c)
        wget.download(makeurl)
Ejemplo n.º 25
0
def download(url):
	"""Copy the contents of a file from a given URL
	to a local file.
	"""
	
	wf = urllib2.urlopen(url)
	html=wf.read()
	# print html
	flist=[]

	mhdf = re.findall('\"M.*\.hdf\"', html)
	mhdfs =[f for f in mhdf if 'h08v04' in f or 'h08v05' in f or 'h09v04' in f]
	# print mhdfs
	for line in mhdfs:
		# print 'a line', line.replace('\"', '')
		fileUrl=url+line.replace('\"', '')
		print fileUrl
		wget.download(fileUrl)

	xhdf = re.findall('\"M.*\.hdf.xml\"', html)
	xhdfs =[f for f in xhdf if 'h08v04' in f or 'h08v05' in f or 'h09v04' in f]
	for line in xhdfs:
		# print 'a line', line.replace('\"', '')
		xfileUrl=url+line.replace('\"', '')
		print xfileUrl
		wget.download(xfileUrl)
Ejemplo n.º 26
0
def download_img(url):
    text = requests.get(url).text
    soup = bs(text, "lxml")

    # total
    total = soup.find('span', {'style': 'color: #DB0909'}).text
    total = total[: -3]
    total = int(total)

    # title
    title = soup.find('h1', {'id': 'htilte'}).text

    url_pattern = soup.find('ul', {'id': 'hgallery'})
    url_pattern = url_pattern.img.get('src').replace('/0.jpg', '/{:03d}.jpg')
    print title
    if os.path.exists(title):
        return

    os.mkdir(title)
    for i in xrange(total):
        file_url = url_pattern.format(i)
        file_name = "{:03d}.jpg".format(i)
        output_file = os.path.join(title, file_name)
        if i == 0:
            file_url = file_url.replace("000", "0")
        wget.download(file_url, out=output_file)
Ejemplo n.º 27
0
def download_files(answer, download_list):
    if answer == 'y' or answer == 'yes':
        for item in download_list:
            print item
            wget.download(download_list[item])
    else:
        print 'Thank you and have a really great day!'
Ejemplo n.º 28
0
def update():
    print("Downloading Update")
    wget.download('<zip>', 'update.zip')

    try:
        shutil.rmtree(dir+'\config')
    except:
        print("Continuing")
    try:
        shutil.rmtree(dir+'\mods')
    except:
        print("Continuing")
    try:
        shutil.rmtree(dir+'\jarmods')
    except:
        print("Continuing")

    with zipfile.ZipFile('update.zip') as myzip:
        myzip.extractall(dir)
        myzip.close()

    os.remove('svn.txt')
    os.remove('update.zip')

    os.rename('svnnew.txt', 'svn.txt')
    print("Update Complete")
Ejemplo n.º 29
0
def create_lisa_data_dir_tree(oseg=None):

    odp = op.expanduser('~/lisa_data/.lisa/')
    if not op.exists(odp):
        os.makedirs(odp)

    import wget
    lisa_icon_path= path(".lisa/LISA256.png")
    if not op.exists(lisa_icon_path):
        try:
            wget.download(
                "https://raw.githubusercontent.com/mjirik/lisa/master/lisa/icons/LISA256.png",
                out=lisa_icon_path)
        except:
            import traceback
            logger.warning('logo download failed')
            logger.warning(traceback.format_exc())

    if oseg is not None:
        # used for server sync
        oseg._output_datapath_from_server = op.join(oseg.output_datapath, 'sync', oseg.sftp_username, "from_server/")
        # used for server sync
        oseg._output_datapath_to_server = op.join(oseg.output_datapath, 'sync', oseg.sftp_username, "to_server/")
        odp = oseg.output_datapath
        if not op.exists(odp):
            os.makedirs(odp)
        odp = oseg._output_datapath_from_server
        if not op.exists(odp):
            os.makedirs(odp)
        odp = oseg._output_datapath_to_server
        if not op.exists(odp):
            os.makedirs(odp)
Ejemplo n.º 30
0
def get_ipr_hierarchy():
	if not os.path.isfile('interpro.xml.gz'):
		print 'downloading interpro data'
		wget.download('ftp://ftp.ebi.ac.uk/pub/databases/interpro/Current/interpro.xml.gz')
	#if os.path.isfile('interpro.hierarchy.p'):
	#	with open('interpro.hierarchy.p','rU') as filehandle:
	#		ipr_hierarchy = pickle.load(filehandle)
	#	return ipr_hierarchy
	print 'preparing interpro data'
	ipr_hierarchy = IprHierarchy()
	with gzip.open('interpro.xml.gz','rb') as filehandle:
		#filecontent = filehandle.read()
		soup = BeautifulSoup(filehandle,'xml')
		for domain in soup.find_all('interpro'):
			name = str(domain.find('name').string)
			parents_list = []
			contains_list = []
			child_list = []
			found_in_list = []
			domain_features = get_domain_features(domain)
			ipr = IprObject(ID=domain['id'],name=name,domain_type=domain['type'],domain_features=domain_features)
			ipr_hierarchy.update(ipr)
	ipr_hierarchy.set_contained_by()
	#print ipr_hierarchy
	with open('interpro.hierarchy.p','w') as filehandle:
		pickle.dump(ipr_hierarchy,filehandle)
	return ipr_hierarchy
#!/usr/bin/env python3
# coding: utf-8

import os.path as osp
from collections import OrderedDict

import gdown
import torch
import tvm
import wget
from tvm import relay

wget.download('https://raw.githubusercontent.com/kosuke55/train_baiducnn/master/scripts/pytorch/BCNN.py')  # noqa
from BCNN import BCNN


def fix_model_state_dict(state_dict):
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k
        if name.startswith('module.'):
            name = name[7:]  # remove 'module.' of dataparallel
        new_state_dict[name] = v
    return new_state_dict


current_dir = osp.dirname(osp.abspath(__file__))
data_dir = osp.join(current_dir, 'data')
pretrained_model = osp.join(data_dir, 'bestmodel.pt')
if not osp.exists(pretrained_model):
    print('Downloading %s' % pretrained_model)
Ejemplo n.º 32
0
doc = open('/mnt/ddnfs/data_users/cxkttwl/ICL/BASS/err_stop_mzls.txt', 'w')

for kk in range(3):
    if kk == 0:
        lo = 180
    else:
        lo = 0
    for jj in range(lo, len(z)):
        ra_g = ra[jj]
        dec_g = dec[jj]
        z_g = z[jj]
        try:
            url_load = 'http://legacysurvey.org/viewer/fits-cutout?ra=%f&dec=%f&layer=dr8&pixscale=0.27&bands=%s&size=3000' % (
                ra_g, dec_g, band[kk])
            out_file = '/mnt/ddnfs/data_users/cxkttwl/ICL/BASS/mzls_img/mzls_img_ra%.3f_dec%.3f_z%.3f_%s_band.fits' % (
                ra_g, dec_g, z_g, band[kk])
            wt.download(url_load, out_file)

            print('**********-----')
            print('finish--', jj / len(z))
        except:
            s = '%s, %d, %.3f, %.3f, %.3f' % (band[kk], jj, ra_g, dec_g, z_g)
            print(
                s,
                file=doc,
            )

doc.close()

print('Done!')
Ejemplo n.º 33
0
async def ytmusic(client, message: Message):
    global is_downloading
    if is_downloading:
        await message.reply_text("دانلود دیگری در جریان است بعدا تلاش کن")
        return

    urlissed = get_text(message)

    pablo = await client.send_message(message.chat.id,
                                      f"`{urlissed} در حال بارگیری`")
    if not urlissed:
        await pablo.edit("خطا")
        return

    search = SearchVideos(f"{urlissed}", offset=1, mode="dict", max_results=1)
    mi = search.result()
    mio = mi["search_result"]
    mo = mio[0]["link"]
    thum = mio[0]["title"]
    fridayz = mio[0]["id"]
    thums = mio[0]["channel"]
    kekme = f"https://img.youtube.com/vi/{fridayz}/hqdefault.jpg"
    await asyncio.sleep(0.6)
    url = mo
    sedlyf = wget.download(kekme)
    opts = {
        "format":
        "best",
        "addmetadata":
        True,
        "key":
        "FFmpegMetadata",
        "prefer_ffmpeg":
        True,
        "geo_bypass":
        True,
        "nocheckcertificate":
        True,
        "postprocessors": [{
            "key": "FFmpegVideoConvertor",
            "preferedformat": "mp4"
        }],
        "outtmpl":
        "%(id)s.mp4",
        "logtostderr":
        False,
        "quiet":
        True,
    }
    try:
        is_downloading = True
        with youtube_dl.YoutubeDL(opts) as ytdl:
            infoo = ytdl.extract_info(url, False)
            duration = round(infoo["duration"] / 60)

            if duration > 8:
                await pablo.edit(
                    f" دقیقه است{duration}ویدیو های بیشتر از 8دقیقه دانلود نمیشوند. این مورد"
                )
                is_downloading = False
                return
            ytdl_data = ytdl.extract_info(url, download=True)

    except Exception:
        # await pablo.edit(event, f"**Failed To Download** \n**Error :** `{str(e)}`")
        is_downloading = False
        return

    c_time = time.time()
    file_stark = f"{ytdl_data['id']}.mp4"
    capy = f"**عنوان ➠** `{thum}` **درخواست :** `{urlissed}` **چنل :** `{thums}` **لینک :** `{mo}`"
    await client.send_video(
        message.chat.id,
        video=open(file_stark, "rb"),
        duration=int(ytdl_data["duration"]),
        file_name=str(ytdl_data["title"]),
        thumb=sedlyf,
        caption=capy,
        supports_streaming=True,
        progress=progress,
        progress_args=(
            pablo,
            c_time,
            f"`از یوتیوب {urlissed} در حال آپلود",
            file_stark,
        ),
    )
    await pablo.delete()
    is_downloading = False
    for files in (sedlyf, file_stark):
        if files and os.path.exists(files):
            os.remove(files)
Ejemplo n.º 34
0
Path(download_path).mkdir(parents=True, exist_ok=True)

prophet_analysis_path = "/AURN_prophet_analysis"
Path(prophet_analysis_path).mkdir(parents=True, exist_ok=True)

meta_data_url = "https://uk-air.defra.gov.uk/openair/R_data/AURN_metadata.RData"
data_url = "https://uk-air.defra.gov.uk/openair/R_data/"

# Do you want to check the sites listed in the meta data file?
# Does the metadatafile exist?
meata_data_filename = 'AURN_metadata.RData'
if os.path.isfile(meata_data_filename) is True:
    print("Meta data file already exists in this directory, will use this")
else:
    print("Downloading Meta data file")
    wget.download(meta_data_url)

# Read the RData file into a Pandas dataframe
metadata = pyreadr.read_r(meata_data_filename)

# In the following we now download the data. Here we have a number of options
# - Specify the years to download data for
# - Specify the local authority[ies] to download data for
# - Download data for all authorities

# Downloading site data for a specific year or years
years = [2015, 2016, 2017, 2018, 2019, 2020]
# If a single year is passed then convert to a list with a single value
if type(years) is int:
    years = [years]
current_year = datetime.datetime.now()
Ejemplo n.º 35
0
            pass
        elif url[0] == "/":
            url = baseURL + url
        else:
            url = baseURL + "/" + url
        urllist.add(url)

for url in urllist:
    filename = url.split("/")[-1]
    if not os.path.isfile(filename.replace(" ", "%20")):
        time.sleep(1)
        if url[-3:] == "pdf":
            response = requests.get(url)
            if not response.status_code > 400:
                url = url.replace(" ", "%20")
                wget.download(url)
                print("Downloading: " + url)
            else:
                print("404 for this guy: " + url)
        elif url[-3:] == "html":
            print(url)
            response = requests.get(url)
            if not response.status_code > 400:
                url = url.replace(" ", "%20")
                wget.download(url)
                print("Downloading: " + url)
            else:
                print("404 for this guy: " + url)
        elif url[-1] == "/":
            p = re.compile("(crd\d{4})")
            m = p.search(url)
Ejemplo n.º 36
0
async def ytmusic(client, message: Message):
    global is_downloading
    if is_downloading:
        await message.reply_text(
            "Another download is in progress, try again after sometime.")
        return

    urlissed = get_text(message)

    pablo = await client.send_message(
        message.chat.id,
        f"`Getting {urlissed} From Youtube Servers. Please Wait.`")
    if not urlissed:
        await pablo.edit(
            "Invalid Command Syntax, Please Check Help Menu To Know More!")
        return

    search = SearchVideos(f"{urlissed}", offset=1, mode="dict", max_results=1)
    mi = search.result()
    mio = mi["search_result"]
    mo = mio[0]["link"]
    thum = mio[0]["title"]
    fridayz = mio[0]["id"]
    thums = mio[0]["channel"]
    kekme = f"https://img.youtube.com/vi/{fridayz}/hqdefault.jpg"
    await asyncio.sleep(0.6)
    url = mo
    sedlyf = wget.download(kekme)
    opts = {
        "format":
        "best",
        "addmetadata":
        True,
        "key":
        "FFmpegMetadata",
        "prefer_ffmpeg":
        True,
        "geo_bypass":
        True,
        "nocheckcertificate":
        True,
        "postprocessors": [{
            "key": "FFmpegVideoConvertor",
            "preferedformat": "mp4"
        }],
        "outtmpl":
        "%(id)s.mp4",
        "logtostderr":
        False,
        "quiet":
        True,
    }
    try:
        is_downloading = True
        with youtube_dl.YoutubeDL(opts) as ytdl:
            infoo = ytdl.extract_info(url, False)
            duration = round(infoo["duration"] / 60)

            if duration > 8:
                await pablo.edit(
                    f"❌ Videos longer than 8 minute(s) aren't allowed, the provided video is {duration} minute(s)"
                )
                is_downloading = False
                return
            ytdl_data = ytdl.extract_info(url, download=True)

    except Exception as e:
        #await pablo.edit(event, f"**Failed To Download** \n**Error :** `{str(e)}`")
        is_downloading = False
        return

    c_time = time.time()
    file_stark = f"{ytdl_data['id']}.mp4"
    capy = f"**Video Name ➠** `{thum}` \n**Requested For :** `{urlissed}` \n**Channel :** `{thums}` \n**Link :** `{mo}`"
    await client.send_video(
        message.chat.id,
        video=open(file_stark, "rb"),
        duration=int(ytdl_data["duration"]),
        file_name=str(ytdl_data["title"]),
        thumb=sedlyf,
        caption=capy,
        supports_streaming=True,
        progress=progress,
        progress_args=(pablo, c_time,
                       f'`Uploading {urlissed} Song From YouTube Music!`',
                       file_stark))
    await pablo.delete()
    is_downloading = False
    for files in (sedlyf, file_stark):
        if files and os.path.exists(files):
            os.remove(files)
Ejemplo n.º 37
0
import pandas as pd
import wget
from os import path
from time import strftime, sleep

# chk if file exist, if not download from LSE url
tmpfilename = 'tmp-' + str(strftime("%Y-%m")) + '-issues-IPOs.xlsx'
if path.exists('tmpdir/' + tmpfilename) == False:
    ## web UI = https://www.londonstockexchange.com/reports?tab=new-issues-and-ipos
    # https://docs.londonstockexchange.com/sites/default/files/reports/New%20issues%20and%20IPOs_1.xlsx
    url1 = 'https://docs.londonstockexchange.com/sites/default/files/reports/'
    filexlsx = 'New issues and IPOs_1.xlsx'
    wget.download(url1 + filexlsx, out='tmpdir/' + tmpfilename)
    sleep(2)

dataexcel = pd.read_excel(
    'tmpdir/' + tmpfilename,
    sheet_name='New Issues and IPOs',
    skiprows=range(6),
    usecols=['Company', 'Date', 'Issue Price', 'Currency'])
comp_list = dataexcel['Company'].tolist()
date_list = dataexcel['Date'].tolist()
price_list = dataexcel['Issue Price'].fillna(0).tolist()
currc_list = dataexcel['Currency'].replace('GBX',
                                           'GBP').fillna('PENCE').tolist()

company = [x for x in comp_list][::-1]
datejoin = [x.date() for x in date_list][::-1]
priceone = [x for x in price_list][::-1]
currency = [x for x in currc_list][::-1]
Ejemplo n.º 38
0
    def __init__(self, config):
        wget.download(
            "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin",
            "model")

        self.model = fasttext.load_model("model")
Ejemplo n.º 39
0
# получаем url и скачиваем картинку
p_url_img = b.select('.series__img-wrap')
string = p_url_img[0]
print(p_url_img)
ser_img = re.findall(fnd_src, str(p_url_img))

# Вынимаем название серии и дату
p_tit = b.select('.series__item .series__item-title')
p_dat = b.select('.series__month-day .series__item-month')
# получаем адрес серии
p_block = b.select('.series__link-block')

for ser in range(ser_num):
    print(ser, ser_img[ser])
    filename = wget.download(ser_img[ser])
    os.rename(filename, u'' + os.getcwd() + '/' + filename)

    ser_title = p_tit[ser].getText()
    ser_dat = p_dat[ser].getText()
    print('Серия: ' + str(ser) + ' ' + ser_title + '  дата:' + ser_dat)
    result = re.findall(fnd_href, str(p_block[ser]))
    # скачиваем
    s1 = requests.get('http://www.tvc.ru' + result[0])
    p = bs4.BeautifulSoup(s1.text, "html.parser")

    # получаем анонс
    p_anons = p.select('.brand__anons-text')
    ser_anons = p_anons[0].getText()
    print(
        'Анонс: \n для ют. Православная энциклопедия ТВЦ \n для фб. #Православная_энциклопедия\n'
Ejemplo n.º 40
0
import argparse
import wget

# download dependencies
if not os.path.exists('data'):
    DIRNAME = os.path.dirname(os.path.abspath(__file__))
    DATA_DIR = os.path.join(DIRNAME, 'data')
    os.mkdir(DATA_DIR)
    ANNOTATIONS_TO_DOWNLOAD = [
        ('https://dl.fbaipublicfiles.com/qaoverlap/data/nq-annotations.jsonl','nq-annotations.jsonl'),
        ('https://dl.fbaipublicfiles.com/qaoverlap/data/triviaqa-annotations.jsonl', 'triviaqa-annotations.jsonl'),
        ('https://dl.fbaipublicfiles.com/qaoverlap/data/webquestions-annotations.jsonl','webquestions-annotations.jsonl')
    ]

    for link, dest in ANNOTATIONS_TO_DOWNLOAD:
        wget.download(link, os.path.join(DATA_DIR, dest))


ANNOTATIONS = [
    'total',
    'question_overlap',
    'no_question_overlap',
    'answer_overlap',
    'no_answer_overlap',
    'answer_overlap_only'
]

DIRNAME = os.path.dirname(os.path.abspath(__file__))

ANNOTATION_PATHS = {
    'triviaqa': os.path.join(DIRNAME, 'data/triviaqa-annotations.jsonl'),
Ejemplo n.º 41
0
        print('Его нельзя: ' + link)
    elif link.find('youtube') != -1:
        print('Его нельзя: ' + link)
    elif link.find('text-lyrics.ru') != -1:
        print('Яма: ' + link)
    else:
        print(link)
        response = requests.get(link)
        soup = BeautifulSoup(response.text,
                             'html.parser').find('div', class_='download')
        print(soup)
        if soup != None:
            soup = soup.__str__()
            for i in BeautifulSoup(soup, 'html.parser').find_all('a',
                                                                 href=True):
                wget.download(i['href'], 'Oxxymiron_where_test.mp3')
                audio = MP3("Oxxymiron_where_test.mp3")
                print("Track: " + audio.get("TIT2").text[0])

                #try:print("Text: " + audio.get("USLT"))
                #except AttributeError: print('Нет текста')
                print('Lenght: ' + str(audio.info.length))
                print('Info: ' + audio.info.pprint())

                audio2 = MP3("Oxxymiron_where.mp3")
                if audio2.get("TIT2") == audio.get(
                        "TIT2"
                ) and audio2.info.length == audio.info.length and audio2.info.pprint(
                ) == audio.info.pprint():
                    print("Это подлинный")
                else:
Ejemplo n.º 42
0
def download(zip_path, path, to_download=True):

    with ZipFile(wget.download(url, path), 'r') as zip_ref:
        zip_ref.extractall(path)
# Where did you save your output from the Cross Validation fits?
#prophet_analysis_path = "/AURN_prophet_analysis"

prophet_analysis_path= 'C:/Users/Dave/Documents/Code/AURN_prophet_analysis'
#Path(prophet_analysis_path).mkdir(parents=True, exist_ok=True)

meta_data_url = "https://uk-air.defra.gov.uk/openair/R_data/AURN_metadata.RData"
data_url = "https://uk-air.defra.gov.uk/openair/R_data/"
# Do you want to check the sites listed in the meta data file?
# Does the metadatafile exist?
meata_data_filename = 'AURN_metadata.RData'
if os.path.isfile(meata_data_filename) is True:
    print("Meta data file already exists in this directory, will use this")
else:
    print("Downloading Meta data file")
    wget.download(meta_data_url)

# Read the RData file into a Pandas dataframe
metadata = pyreadr.read_r(meata_data_filename)

# We do not download any AURN data in this script, since this will have taken
# place during the Prophet fitting process
# Nonetheless we can still choose to look at individual authorities

manual_selection = True
save_to_csv = False
site_data_dict=dict()
site_data_dict_name=dict()
diurnal_plot = True

if manual_selection is True:
def main(argv):
    global logger, resultsFile, requiresAuth, user, pwd, queryListTxt, queries, runs, outliers, warmups, parallell, endpoint
    
    configfile = False
    config = False
    # Read Config file
    try:
        opts, args = getopt.getopt(argv,"hc:",["config="])
    except getopt.GetoptError:
        print ('test.py -c <configfile>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print ('test.py -c <configfile>')
            sys.exit()
        elif opt in ("-c", "--config"):
            configfile = arg

    if not configfile:
        print ('No config file given, usage:')
        print ('test.py -c <configfile>')
        sys.exit(2)
    else:
        print ('Config file is "', configfile)
        config = yaml.safe_load(open(configfile))
        
    if not config or config is None:
        print ('Invalid config file given, try again or check the path to the config file, usage:')
        print ('test.py -c <configfile>')
        sys.exit(2)
    else:
        print ('Loaded config')
        print (config)
        
    
    ##Set Benchmarker Parameters
    
    #Log file
    logger = logging.getLogger('Logger')
    fhandler = logging.FileHandler(filename='output.log', mode='a')
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fhandler.setFormatter(formatter)
    logger.addHandler(fhandler)
    logger.setLevel(logging.DEBUG)
    logger.propagate = False

    #Store under test
    store = config['storeUnderTest']

    #Runner
    url = "http://downloads.sourceforge.net/project/sparql-query-bm/2.1.0/sparql-query-bm-distribution.tar.gz"
    fname = "sparql-query-bm-distribution.tar.gz"
    dirname = "sparql-query-bm-2.1.0"
    totalRuns = config['totalRuns'] #1 immediate, one after fixed time, one control run after same time interval
    timeInterval = config['timeInterval'] #in seconds
    checkDataLoaded = config['check_data_loaded']

    #Target Endpoint
    endpoint = config['endpoint']
    sparqlEndpoint = SPARQLWrapper(endpoint)
    requiresAuth = config['requiresAuth']

    user = config['user']
    pwd = config['pwd']

    if requiresAuth:
        sparqlEndpoint.setCredentials(user, pwd)

    #Dataset
    firstTriple = config['firstTriple']

    lastTriple = config['lastTriple']

    #Queries
    queryListName = config['queryListName']
    queryList = config['queryListSource'] + queryListName 
    queryListTxt = queryListName[:-3]+"txt"
    queriesName = config['queriesName']
    queries = config['queryListSource'] + queriesName + ".tar.gz"

    #Benchmark
    runs = config['runs']
    outliers = config['outliers']
    warmups = config['warmups']
    parallell = config['parallell']
    resultsFile = "results_"+store
    
    
    #Install if necessary the Benchmarker Software
    if not os.path.isfile(fname):
        print ("Downloading: " +  str(fname))
        wget.download(url)
    if not os.path.isdir(dirname):
        print ("Untarring to dir: " + str(dirname))
        untar(fname)
    else:
        print ("SPARQL Benchmarker already present in dir: " + str(dirname))
        
        
    #Retrieve the queries
    if not os.path.isfile(queryListName):
        print ("Downloading: " +str(queryListName))

        print("wget: " + str(queryList))
        wget.download(queryList)
    else:
        print ("Query list already present: " + str(queryListName))

    if not os.path.isfile(queryListTxt):
        shutil.copyfile(queryListName, queryListTxt)

    if not os.path.isfile(queriesName + ".tar.gz"):
        print ("Downloading: " + str(queriesName))
        wget.download(queries)
    if not os.path.isdir(queriesName):
        print ("Untarring to dir: " + str(queriesName))
        extract(str(queriesName) + ".tar.gz")
    else:
        print ("Queries already present in dir: " + str(queriesName))    
    
    
    #Test if the endpoint is up and data is loaded and execute run
    time.sleep(30)

    if checkDataLoaded:
        while ( not hasTriple(sparqlEndpoint, firstTriple) ) or (not hasTriple(sparqlEndpoint, lastTriple) ):
            logger.info("This polls once a minute until data is loaded.")
            time.sleep(60)
    
    logger.info("All data is loaded.")
    print("All data is loaded.")

    logger.info("Running Queries")
    print("Running Queries")

    for x in range(0, totalRuns):
        logger.info("Run %s of %s" % (x, totalRuns))
        print("Run %s of %s" % (x, totalRuns))

        rProc = run(x)
        trace(rProc)

        logger.info("Waiting %s for next run" % (timeInterval))
        print("Waiting %s for next run" % (timeInterval))

        time.sleep(timeInterval)   
Ejemplo n.º 45
0
def get_nvd_feed():
    url = 'https://nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-recent.json.zip'  # NVD Feed URL
    wget.download(url)
    command = 'unzip -o nvdcve-1.0-recent.json.zip'  # Unzip json.gz file
    os.system(command)
Ejemplo n.º 46
0
imgplot.set_cmap('gray')

image = Image.fromarray(
    tile_raster_images(X=W.numpy().T[10:11],
                       img_shape=(28, 28),
                       tile_shape=(1, 1),
                       tile_spacing=(1, 1)))
### Plot image
plt.rcParams['figure.figsize'] = (4.0, 4.0)
imgplot = plt.imshow(image)
imgplot.set_cmap('gray')

#>>>>>>>>>>>>>>>>>>Evaluation<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
if os.path.isfile('destructed3.jpg') is False:
    url = 'https://ibm.box.com/shared/static/vvm1b63uvuxq88vbw9znpwu5ol380mco.jpg'
    wget.download(url, 'destructed3.jpg')
img = Image.open('destructed3.jpg')

# convert the image to a 1d numpy array
sample_case = np.array(img.convert('I').resize((28, 28))).ravel().reshape(
    (1, -1)) / 255.0

sample_case = tf.cast(sample_case, dtype=tf.float32)

hh0_p = tf.nn.sigmoid(tf.matmul(sample_case, W) + hb)
hh0_s = tf.round(hh0_p)

print("Probability nodes in hidden layer:", hh0_p)
print("activated nodes in hidden layer:", hh0_s)

# reconstruct
Ejemplo n.º 47
0
# Setup folders
print("Setting up install directories at /opt/caai")
if not os.path.exists('/opt/caai'):
    Path('/opt/caai').mkdir(parents=True, exist_ok=True)
if not os.path.exists('/opt/caai/share'):
    Path('/opt/caai/share').mkdir(parents=True, exist_ok=True)
if not os.path.exists('/opt/caai/bin'):
    Path('/opt/caai/bin').mkdir(parents=True, exist_ok=True)
if not os.path.exists('/opt/caai/rhscripts'):
    Path('/opt/caai/rhscripts').mkdir(parents=True, exist_ok=True)

# Download models
if not os.path.exists('models.zip'):
    print('Downloading models for DeepMRAC')
    url = "http://resolute.pet.rh.dk:8000/models_01sep2020.zip"
    wget.download(url, 'models.zip')
    print("")

# Unzip models
if not os.path.exists('/opt/caai/share/DeepMRAC'):
    print("Extracting models")
    with zipfile.ZipFile('models.zip', 'r') as zip_ref:
        zip_ref.extractall('/opt/caai/share/DeepMRAC')

# Install scripts
print("Installing run scripts")
copyfile('scripts/process_DeepDixon_dicom.py',
         '/opt/caai/bin/process_DeepDixon_dicom.py')
copymode('scripts/process_DeepDixon_dicom.py',
         '/opt/caai/bin/process_DeepDixon_dicom.py')
Ejemplo n.º 48
0
# downloads the .bz2 file from the internet

import wget, os
if not os.path.isfile('first_actual_attempt/2015-12.bz2'):
    wget.download('https://files.pushshift.io/reddit/comments/RC_2005-12.bz2',
                  'first_actual_attempt/2015-12.bz2')
Ejemplo n.º 49
0
import pandas as pd
import os
import wget
import zipfile
import glob

file = '/home/UA/jschroder/Downloads/ned_111.csv'
df = pd.read_csv(file,index_col=0)

DL_NED1 = '/workspace/Shared/Users/jschroder/TMP/DL_YQ'
if not os.path.exists(DL_NED1): os.makedirs(DL_NED1)

for i,k in zip(df.downloadURL , range(1,len(df.downloadURL)+1)) :
    print 'downloading %s out of %s' %(k , len(df.downloadURL))
    wget.download(i,out=DL_NED1)
    

for j in os.listdir(DL_NED1):
    zfile = zipfile.ZipFile(os.path.join(DL_NED1,j))
    zfile.extractall(DL_NED1)
a = DL_NED1

ls = [ os.path.join(a,i) for i in glob.glob(os.path.join(a,'*.img'))    ]
tiles = ' '.join(map(str,ls))
full = os.path.join(a,'full.img')
full2 = os.path.join(a,'full3.img')
fulltiff =os.path.join(a,'full3.tif')
Ejemplo n.º 50
0

if __name__ == '__main__':
    # 判断是否有新版本,没有新版本退出
    ver_url = 'http://192.168.4.6/deploy/live_ver'
    ver_fname = '/var/www/deploy/live_ver'
    if not has_new_ver(ver_url, ver_fname):
        print('未发现新版本。')
        exit(1)

    # 下载新版本文件
    r = requests.get(ver_url)
    ver_num = r.text.strip()  # 去掉结尾的\n
    down_dir = '/var/www/download'
    app_url = 'http://192.168.4.6/deploy/pkgs/mysite-%s.tar.gz' % ver_num
    wget.download(app_url, down_dir)

    # 校验文件是否损坏,如果文件已损坏,则删除损坏文件并退出
    app_fname = app_url.split('/')[-1]
    app_fname = os.path.join(down_dir, app_fname)
    md5_url = app_url + '.md5'
    if not file_ok(app_fname, md5_url):
        os.remove(app_fname)
        print('文件已损坏')
        exit(2)

    # 部署新版本
    deploy_dir = '/var/www/deploy'
    dest = '/var/www/html/nsd1907'
    deploy(app_fname, deploy_dir, dest)
Ejemplo n.º 51
0
#!/usr/bin/env python3

import wget


url = 'https://static.alta3.com/images/python/csv_users.txt'
wget.download(url, '/c/SDE-JS/Python/mycode/credmaker/csv_users.txt')

outFile = open("admin.rc", "a")
osAUTH = input("What is the OS_AUTH_URL?")
print("export OS_AUTH_URL=" + osAUTH, file=outFile)

print("export OS_IDENTITY_API_VERSION=3", file=outFile)

osPROJ = input("What is the OS_PROJECT_NAME?")
print("export OS_PROJECT_NAME=" + osPROJ, file=outFile)

osPROJDOM = input("What is the OS_PROJECT_DOMAIN_NAME?")
print("export OS_PROJECT_DOMAIN_NAME=" + osPROJDOM, file=outFile)

osUSER = input("What is the OS_USERNAME?")
print("export OS_USERNAME="******"What is the OS_USER_DOMAIN_NAME?")
print("export OS_USER_DOMAIN_NAME=" + osUSERDOM, file=outFile)

osPASS = input("What is the OS_PASSWORD?")
print("export OS_PASSWORD=" + osPASS, file=outFile)
outFile.close()
Ejemplo n.º 52
0
import requests
n = 2
for i in range(n):
    API_URL = 'https://dog.ceo/api/breeds/image/random'
    #API_KEY = 'i0cgsdYL3hpeOGkoGmA2TxzJ8LbbU1HpbkZo8B3kFG2bRKjx3V'
    #headers = {'UserAPI-Key': API_KEY}
    response = requests.get('{}'.format(API_URL))
    data = response.json()
    print(str(i) + " " + data['status'] + "-" + data['message'])
    if (data['status'] == "success"):
        fileurl = data['message']
        urlsplit = fileurl.split('/')
        breed = urlsplit[4]
        fname = urlsplit[5]
        newfilename = "images/" + breed + "---" + fname
        #        print(breed+"---"+fname)
        #        print(newfilename)
        import wget
        file_name = wget.download(fileurl)
        import os
        os.rename(fname, newfilename)
Ejemplo n.º 53
0
async def ytmusic(client, message: Message):
    global is_downloading
    if is_downloading:
        await message.reply_text(
            "Downloadan yang lain sedang berlangsung, coba lagi nanti")
        return

    urlissed = get_text(message)

    pablo = await client.send_message(
        message.chat.id,
        f"`Mendapatkan {urlissed} Dari Youtube. Tunggu Sebentar.`")
    if not urlissed:
        await pablo.edit(
            "Sintaks Perintah Tidak Valid, Silakan Periksa Menu Help Untuk Mengetahui Lebih Lanjut!"
        )
        return

    search = SearchVideos(f"{urlissed}", offset=1, mode="dict", max_results=1)
    mi = search.result()
    mio = mi["search_result"]
    mo = mio[0]["link"]
    thum = mio[0]["title"]
    fridayz = mio[0]["id"]
    thums = mio[0]["channel"]
    kekme = f"https://img.youtube.com/vi/{fridayz}/hqdefault.jpg"
    await asyncio.sleep(0.6)
    url = mo
    sedlyf = wget.download(kekme)
    opts = {
        "format":
        "best",
        "addmetadata":
        True,
        "key":
        "FFmpegMetadata",
        "prefer_ffmpeg":
        True,
        "geo_bypass":
        True,
        "nocheckcertificate":
        True,
        "postprocessors": [{
            "key": "FFmpegVideoConvertor",
            "preferedformat": "mp4"
        }],
        "outtmpl":
        "%(id)s.mp4",
        "logtostderr":
        False,
        "quiet":
        True,
    }
    try:
        is_downloading = True
        with youtube_dl.YoutubeDL(opts) as ytdl:
            infoo = ytdl.extract_info(url, False)
            duration = round(infoo["duration"] / 60)

            if duration > 10:
                await pablo.edit(
                    f"❌ Video berdurasi lebih dari 10 menit tidak diperbolehkan, video yang disediakan diperbolehkan {duration} minute(s)"
                )
                is_downloading = False
                return
            ytdl_data = ytdl.extract_info(url, download=True)

    except Exception as e:
        #await pablo.edit(event, f"**Failed To Download** \n**Error :** `{str(e)}`")
        is_downloading = False
        return

    c_time = time.time()
    file_stark = f"{ytdl_data['id']}.mp4"
    capy = f"**Nama Video ➠** `{thum}` \n**Requested For :** `{urlissed}` \n**Channel :** `{thums}` \n**Link :** `{mo}`"
    await client.send_video(
        message.chat.id,
        video=open(file_stark, "rb"),
        duration=int(ytdl_data["duration"]),
        file_name=str(ytdl_data["title"]),
        thumb=sedlyf,
        caption=capy,
        supports_streaming=True,
        progress=progress,
        progress_args=(pablo, c_time,
                       f'`Uploading {urlissed} Song From YouTube Music!`',
                       file_stark))
    await pablo.delete()
    is_downloading = False
    for files in (sedlyf, file_stark):
        if files and os.path.exists(files):
            os.remove(files)
Ejemplo n.º 54
0
async def mudapk(client, message):
    pablo = await edit_or_reply(message, "`Searching For Mod App.....`")
    sgname = get_text(message)
    if not sgname:
        await pablo.edit(
            "`Please Give Me A Valid Input. You Can Check Help Menu To Know More!`"
        )
        return
    PabloEscobar = (
        f"https://an1.com/tags/MOD/?story={sgname}&do=search&subaction=search")
    r = requests.get(PabloEscobar)
    soup = BeautifulSoup(r.content, "html5lib")
    mydivs = soup.find_all("div", {"class": "search-results"})
    Pop = soup.find_all("div", {"class": "title"})
    try:
        sucker = mydivs[0]
    except IndexError:
        await pablo.edit("**404 Mod App Not Found!")
        return
    pH9 = sucker.find("a").contents[0]
    file_name = pH9

    pH = sucker.findAll("img")
    imme = wget.download(pH[0]["src"])
    Pablo = Pop[0].a["href"]

    ro = requests.get(Pablo)
    soup = BeautifulSoup(ro.content, "html5lib")

    mydis = soup.find_all("a", {"class": "get-product"})

    Lol = mydis[0]

    lemk = "https://an1.com" + Lol["href"]
    print(lemk)
    rr = requests.get(lemk)
    soup = BeautifulSoup(rr.content, "html5lib")

    script = soup.find("script", type="text/javascript")

    leek = re.search(r'href=[\'"]?([^\'" >]+)', script.text).group()
    dl_link = leek[5:]
    r = requests.get(dl_link)
    await pablo.edit("Downloading Mod App")
    ca = f"**App Name :** `{file_name}` \n\n**Uploaded Using @cpbotOT**"
    open(f"{file_name}.apk", "wb").write(r.content)
    c_time = time.time()
    await pablo.edit(f"`Downloaded {file_name}! Now Uploading APK...`")
    await client.send_document(
        message.chat.id,
        document=open(f"{file_name}.apk", "rb"),
        thumb=imme,
        caption=ca,
        progress=progress,
        progress_args=(
            pablo,
            c_time,
            f"`Uploading {file_name} Mod App`",
            f"{file_name}.apk",
        ),
    )
    os.remove(f"{file_name}.apk")
    os.remove(imme)
    await pablo.delete()
Ejemplo n.º 55
0
def phibase_mapping(base):

    phi_base_blast_raw_df = pd.read_csv(
        f"{base}/phibase/phibase_blast_raw.out", sep="\t", header=None)
    phi_base_blast_mapping_df = pd.read_csv(
        f"{base}/phibase/f_culmorum_phi_mapping.txt", sep="\t")
    phi_base_blast_mapping_df.columns = ['Gene', 'Protein ID']
    phi_fn = wget.download(
        'https://raw.githubusercontent.com/PHI-base/data/master/releases/phi-base_current.csv',
        out=f"{base}/phibase/")

    phi_df = pd.read_csv(phi_fn, sep=",")
    # Remove unamed columns
    phi_df.drop(phi_df.columns[phi_df.columns.str.contains('unnamed',
                                                           case=False)],
                axis=1,
                inplace=True)
    col_names = phi_df.columns
    col_names_list = list(col_names)
    phenotype_names = [x for x in col_names_list if 'phenotype' in x.lower()]

    phi_series = phi_df['Pathogen species'].str.upper()
    fusarium_index = phi_series[phi_series.str.contains(
        "FUSARIUM")].index  # 26 instances of Fusarium

    fusarium_phi = phi_df[phi_df.index.isin(
        fusarium_index)]  # fusarium specific Phi results
    updated_fusarium_df = fusarium_phi[[
        'Gene', 'Gene ID', 'Protein ID', 'Host species', 'Pathogen species',
        'Disease', 'Mutant Phenotype'
    ]]
    updated_fusarium_df.reset_index(inplace=True)
    del updated_fusarium_df['index']

    disease_df = phibase_aggregate(updated_fusarium_df, 'Disease')
    disease_df.to_csv(f"{base}/phibase/fusarium-phibase-disease.txt",
                      sep="\t",
                      index=None)
    phenotype_df = phibase_aggregate(updated_fusarium_df, 'Mutant Phenotype')
    phenotype_df.to_csv(f"{base}/phibase/fusarium-phibase-phenotype.txt",
                        sep="\t",
                        index=None)

    gene_mapping_fusarium_phi_df = fusarium_phi[[
        'Gene', 'Gene ID', 'Protein ID'
    ]]
    gene_mapping_fusarium_phi_df.columns = [
        'Gene name', 'Gene ID', 'Protein ID'
    ]

    phi_base_blast_mapping_df = phi_base_blast_mapping_df.drop_duplicates(
        subset='Gene', keep='first')

    gene_name_phibase_merged = pd.merge(phi_base_blast_mapping_df,
                                        gene_mapping_fusarium_phi_df,
                                        on="Protein ID",
                                        how='inner')
    gene_name_phibase_merged = gene_name_phibase_merged[['Gene', 'Gene name']]
    gene_name_phibase_merged = gene_name_phibase_merged.drop_duplicates(
        subset='Gene name', keep='first')

    updated_gene_names = []
    for name in gene_name_phibase_merged['Gene name']:
        if name.startswith("("):
            updated_gene_names.append(name.replace("(", "").replace(")", ""))
        elif "(" in name:
            updated_gene_names.append(name.split('(', 1)[0])
        else:
            updated_gene_names.append(name)

    gene_name_phibase_merged['Gene name'] = updated_gene_names
    gene_name_phibase_merged['Gene'] = gene_name_phibase_merged[
        'Gene'].str.replace("T", "G")
    gene_name_phibase_merged.to_csv(
        f"{base}/phibase/fusarium-phi-gene-mapping.txt", sep="\t", index=None)

    phi_base_blast_raw_df = phi_base_blast_raw_df.drop_duplicates(subset=0,
                                                                  keep='first')
    phi_base_blast_raw_df.to_csv(f"{base}/phibase/phibase-blast-filtered.txt",
                                 sep="\t",
                                 index=None)
Ejemplo n.º 56
0
import os
os.getcwd()
import time
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
driver = webdriver.Chrome(
    executable_path='/home/jerry/Documents/Develop/chromedriver')
import wget
import urllib.request
url = 'https://www.google.com/search?q=%EB%90%9C%EC%9E%A5%EC%B0%8C%EA%B2%8C&tbm=isch&ved=2ahUKEwie1q_0xczsAhUG6ZQKHYlzDmQQ2-cCegQIABAA&oq=%EB%90%9C%EC%9E%A5%EC%B0%8C%EA%B2%8C&gs_lcp=CgNpbWcQAzICCAAyBggAEAoQGDIGCAAQChAYMgYIABAKEBgyBAgAEBgyBggAEAoQGDIGCAAQChAYMgYIABAKEBhQwaQcWJupHGC1rBxoAXAAeACAAcIBiAHKA5IBAzAuM5gBAKABAaoBC2d3cy13aXotaW1nwAEB&sclient=img&ei=PMKTX57ID4bS0wSJ57mgBg&bih=936&biw=916'
driver.get(url=url)

#elements=driver.find_elements(By.XPATH, '//img[@class="search-product-wrap-img"]')
elements = driver.find_elements(By.XPATH, '//img[@class="rg_i Q4LuWd"]')
down_path = '/home/jerry/Documents/Develop/pictures/'
for element in elements:
    src = element.get_attribute('src')
#img_txt = src.split('/')[-1]
image_name = down_path  #+img_txt
wget.download(url=src, out=image_name)

#islrg > div.islrc > div:nth-child(4) > a.wXeWr.islib.nfEiy.mM5pbd > div.bRMDJf.islir > img

# 한글 지원
# 잘되네 well done
            print("Directory " + folderName + " already exists")

        if not os.path.exists(fileFolderName):
            os.mkdir(fileFolderName)
            print("Directory " + fileFolderName + " Created ")
        else:
            print("Directory " + fileFolderName + " already exists")

        i = 0
        for doc_id in doc_id_list:
            url_final = url_descarga_1 + doc_id + url_descarga_2
            file_path = os.path.join(
                fileFolderName, date_name + "&" + number_list[i] + "&" + entry_number_list[i])
            print("\n" + file_path)

            file_extension = utils.get_extension(url_final)
            file_complete_path = file_path + "." + file_extension

            if os.path.exists(file_complete_path):
                os.remove(file_complete_path)

            file_name = wget.download(url_final, file_complete_path)
            if (file_extension != "pdf"):
                utils.convert_to_pdf(file_name)
            i = i + 1

input("\nPress any key to close")



Ejemplo n.º 58
0
print("MODEL_URL:   " + MODEL_URL)
assert MODEL_URL is not None

# Get an unique ID
ID = str(uuid.uuid4())

# Create an empty dir
MODEL_FOLDER = join('models', ID)
if exists(MODEL_FOLDER):
  shutil.rmtree(MODEL_FOLDER)
makedirs(MODEL_FOLDER)
print("MODEL_FOLDER: " + MODEL_FOLDER)

print("Downloading...")
filename = wget.download(MODEL_URL, MODEL_FOLDER)
print("FILENAME: " + filename)
print("OK")

print("Extracting...")
dataset_zip = zipfile.ZipFile(filename)
dataset_zip.extractall(MODEL_FOLDER)
dataset_zip.close()
remove(filename)
print("OK")

MODEL_PATH = None
LABELS_PATH = None
TEXT_PATH = None 
for file in listdir(MODEL_FOLDER):
  if file.endswith(".pt") or file.endswith(".pth") or file.endswith(".weights"):
input_manager.start()

QueueManager.register('get_output_queue', callable=lambda: output_queue)
output_manager = QueueManager(address=(HOST, OUTPUT_PORT), authkey=OUTPUT_AUTH)
output_manager.start()

model_file = 'efficientnet-edgetpu-M_quant_edgetpu.tflite'
model_name = 'efficientnet-edgetpu-M'
model_path = 'data/models/' + model_name + '/' + model_file

base_url = 'https://raw.githubusercontent.com/neuralet/neuralet-models/master/edge-tpu/'
url = base_url + model_name + '/' + model_file

if not os.path.isfile(model_path):
    print('model does not exist, downloading from ', url)
    wget.download(url, model_path)


def main():

    interpreter = Interpreter(
        model_path, experimental_delegates=[load_delegate("libedgetpu.so.1")])

    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    input_queue = input_manager.get_input_queue()
    output_queue = output_manager.get_output_queue()

    print(
Ejemplo n.º 60
0
def retrieve_fasta_files():

    # Fusarium oxysporum
    fn_f_oxy_ensembl = wget.download(
        'ftp://ftp.ensemblgenomes.org/pub/fungi/release-47/fasta/fusarium_oxysporum/pep/Fusarium_oxysporum.FO2.pep.all.fa.gz',
        f'{base}/ensembl/fusarium_oxysporum.pep.fa.gz')
    fn_f_oxy_ensembl = unzip_tidy(fn_f_oxy_ensembl, f'{base}/ensembl/')
    # Read fasta file then filter it
    with open(f'{fn_f_oxy_ensembl}', "r+") as handle:
        SeqIO.write(yield_ensembl_records(fn_f_oxy_ensembl), handle, "fasta")

    # A nidulans agdb
    fn_f_nidulans_agdb = wget.download(
        'http://www.aspergillusgenome.org/download/sequence/A_nidulans_FGSC_A4/current/A_nidulans_FGSC_A4_current_orf_coding.fasta.gz',
        f'{base}/agdb/a_nidulans_orf.fa.gz')
    fn_f_nidulans_agdb = unzip_tidy(fn_f_nidulans_agdb, '')

    with open(f'{fn_f_nidulans_agdb}', "r+") as handle:
        SeqIO.write(yield_ensembl_records(fn_f_nidulans_agdb), handle, "fasta")

    # A nidulans Ensembl
    fn_a_nid_ensembl = wget.download(
        'ftp://ftp.ensemblgenomes.org/pub/fungi/release-47/fasta/aspergillus_nidulans/pep/Aspergillus_nidulans.ASM1142v1.pep.all.fa.gz',
        f'{base}/ensembl/a_nidulans.pep.fa.gz')
    fn_a_nid_ensembl = unzip_tidy(fn_a_nid_ensembl, '')
    with open(f'{fn_a_nid_ensembl}', "r+") as handle:
        SeqIO.write(yield_ensembl_records(fn_a_nid_ensembl), handle, "fasta")

    # A nidulans UniProt
    fn_a_nidulans_uniprot = wget.download(
        'https://www.uniprot.org/uniprot/?query=reviewed:yes%20taxonomy:162425&format=fasta&force=true',
        f'{base}/uniprot/a_nidulans_uniprot.fa')
    with open(fn_a_nidulans_uniprot, "r+") as handle:
        SeqIO.write(yield_records(fn_a_nidulans_uniprot, '|'), handle, "fasta")

    # Fusarium graminearum
    fn_gram_ensembl = wget.download(
        'ftp://ftp.ensemblgenomes.org/pub/fungi/release-47/fasta/fusarium_graminearum/pep/Fusarium_graminearum.RR1.pep.all.fa.gz',
        f'{base}/ensembl/fusarium_graminearum.pep.fa.gz')
    fn_gram_ensembl = unzip_tidy(fn_gram_ensembl, f'{base}/ensembl/')
    with open(f'{fn_gram_ensembl}', "r+") as handle:
        SeqIO.write(yield_ensembl_records(fn_gram_ensembl), handle, "fasta")

    # Fusarium gram UniProt
    fn_gram_uniprot = wget.download(
        'https://www.uniprot.org/uniprot/?query=reviewed:yes%20taxonomy:5506&format=fasta&force=true',
        f'{base}/uniprot/fusarium_gram_uniprot.fa')
    with open(fn_gram_uniprot, "r+") as handle:
        SeqIO.write(yield_records(fn_gram_uniprot, '|'), handle, "fasta")

    # Fusarium lang Ensembl
    fn_lang_ensembl = wget.download(
        'ftp://ftp.ensemblgenomes.org/pub/release-47/fungi/fasta/fungi_ascomycota3_collection/fusarium_langsethiae_gca_001292635/pep/Fusarium_langsethiae_gca_001292635.ASM129263v1.pep.all.fa.gz',
        f'{base}/ensembl/fusarium_lang_uniprot.fa.gz')
    fn_lang_ensembl = unzip_tidy(fn_lang_ensembl, f'')
    with open(f'{fn_lang_ensembl}', "r+") as handle:
        SeqIO.write(yield_ensembl_records(fn_lang_ensembl), handle, "fasta")

    # Fusarium pesudo Ensembl
    fn_psuedo_ensembl = wget.download(
        'ftp://ftp.ensemblgenomes.org/pub/release-47/fungi/fasta/fusarium_pseudograminearum/pep/Fusarium_pseudograminearum.GCA_000303195.1.pep.all.fa.gz',
        f'{base}/ensembl/fusarium_pseudogram.pep.fa.gz')
    fn_psuedo_ensembl = unzip_tidy(fn_psuedo_ensembl, '')
    with open(f'{fn_psuedo_ensembl}', "r+") as handle:
        SeqIO.write(yield_ensembl_records(fn_psuedo_ensembl), handle, "fasta")

    # Fusarium pesudo uniprot
    fn_pesudo_uniprot = wget.download(
        'https://www.uniprot.org/uniprot/?query=proteome:UP000007978&format=fasta&force=true',
        f'{base}/uniprot/fusarium_pseudogram_uniprot.fa')
    with open(fn_pesudo_uniprot, "r+") as handle:
        SeqIO.write(yield_records(fn_pesudo_uniprot, '|'), handle, "fasta")

    # Fusarium venenatum Ensembl
    fn_venea_ensembl = wget.download(
        'ftp://ftp.ensemblgenomes.org/pub/release-47/fungi/fasta/fungi_ascomycota4_collection/fusarium_venenatum_gca_900007375/pep/Fusarium_venenatum_gca_900007375.ASM90000737v1.pep.all.fa.gz',
        f'{base}/ensembl/fusarim_venenatum.fa.gz')
    fn_venea_ensembl = unzip_tidy(fn_venea_ensembl, '')
    with open(f'{fn_venea_ensembl}', "r+") as handle:
        SeqIO.write(yield_ensembl_records(fn_venea_ensembl), handle, "fasta")

    # Fusarium evenen UniProt
    fn_venea_uniprot = wget.download(
        'https://www.uniprot.org/uniprot/?query=taxonomy:56646&format=fasta&force=true',
        f'{base}/uniprot/fusarium_evenen_uniprot.fa')
    with open(fn_venea_uniprot, "r+") as handle:
        SeqIO.write(yield_records(fn_venea_uniprot, '|'), handle, "fasta")

    # Magna Oryzae Ensembl
    fn_magna_oryzae_ensembl = wget.download(
        'ftp://ftp.ensemblgenomes.org/pub/release-47/fungi/fasta/magnaporthe_oryzae/pep/Magnaporthe_oryzae.MG8.pep.all.fa.gz',
        f'{base}/ensembl/magna_oryzae_ensembl.pep.all.fa.gz')
    fn_magna_oryzae_ensembl = unzip_tidy(fn_magna_oryzae_ensembl, '')
    with open(f'{fn_magna_oryzae_ensembl}', "r+") as handle:
        SeqIO.write(yield_ensembl_records(fn_magna_oryzae_ensembl), handle,
                    "fasta")

    # Magna Oryzae UniProt
    fn_magna_oryzae_uniprot = wget.download(
        'https://www.uniprot.org/uniprot/?query=magnaporthe%20oryzae&format=fasta&force=true&sort=score&fil=reviewed:yes',
        f'{base}/uniprot/magna_oryzae_uniprot.fa')
    with open(fn_magna_oryzae_uniprot, "r+") as handle:
        SeqIO.write(yield_records(fn_magna_oryzae_uniprot, '|'), handle,
                    "fasta")

    # Ncrassa UniProt
    fn_ncrassa_uniprot = wget.download(
        'https://www.uniprot.org/uniprot/?query=neurospora%20crassa&format=fasta&force=true&sort=score&fil=reviewed:yes',
        f'{base}/uniprot/ncrassa_uniprot.fa')
    with open(fn_ncrassa_uniprot, "r+") as handle:
        SeqIO.write(yield_records(fn_ncrassa_uniprot, '|'), handle, "fasta")

    # Secrev UniProt
    fn_s_cerevisiae_uniprot = wget.download(
        'https://www.uniprot.org/uniprot/?query=saccharomyces%20cerevisiae&format=fasta&force=true&sort=score&fil=reviewed:yes',
        f'{base}/uniprot/s_cerevisiae_uniprot.fa')
    with open(fn_s_cerevisiae_uniprot, "r+") as handle:
        SeqIO.write(yield_records(fn_s_cerevisiae_uniprot, '|'), handle,
                    "fasta")

    # Secrev YGD
    fn_s_cerevisiae_YGD = wget.download(
        'http://sgd-archive.yeastgenome.org/sequence/S288C_reference/orf_protein/orf_trans.fasta.gz',
        f'{base}/YGD/s_cerevisiae_YGD.fa.gz')
    fn_s_cerevisiae_YGD = unzip_tidy(fn_s_cerevisiae_YGD, '')
    ygd_id, hgnc = [], []
    with open(fn_s_cerevisiae_YGD, "r+") as handle:
        for v in handle:
            if ">" in v:
                ygd_id.append(v.split(" ")[0].replace(">", ""))
                hgnc.append(v.split(" ")[1])
    ygd_mapping_df = pd.DataFrame({'YGD ID': ygd_id, 'HGNC': hgnc})
    ygd_mapping_df.to_csv(f'{base}/mapping/ygd_hgnc_mapping.txt',
                          sep="\t",
                          index=None)
    with open(f'{fn_s_cerevisiae_YGD}', "r+") as handle:
        SeqIO.write(yield_ensembl_records(fn_s_cerevisiae_YGD), handle,
                    "fasta")

    # Zymo Ensembl
    fn_z_trici = wget.download(
        'ftp://ftp.ensemblgenomes.org/pub/release-47/fungi/fasta/zymoseptoria_tritici/pep/Zymoseptoria_tritici.MG2.pep.all.fa.gz',
        f'{base}/ensembl/zymoseptoria_tritici.fa.gz')
    fn_z_trici = unzip_tidy(fn_z_trici, '')
    with open(f'{fn_z_trici}', "r+") as handle:
        SeqIO.write(yield_ensembl_records(fn_z_trici), handle, "fasta")

    # Zymo UniProt
    fn_z_trici_uniprot = wget.download(
        'https://www.uniprot.org/uniprot/?query=zymoseptoria&format=fasta&force=true&sort=score&fil=organism:%22Zymoseptoria%20tritici%20ST99CH_1A5%20[1276529]%22',
        f'{base}/uniprot/zymoseptoria_tritici_uniprot.fa')
    with open(fn_z_trici_uniprot, "r+") as handle:
        SeqIO.write(yield_records(fn_z_trici_uniprot, '|'), handle, "fasta")