def downloadAndCheckFeedVersion(self, currentVersion, logger):
        MISSING_VALUE = (False, None, None)
        title = currentVersion['f']['t']
        logger.info(f'looking at {title}')

        # no download url, return
        if 'url' not in currentVersion:
            logger.error('no download url, skipping')
            return MISSING_VALUE

        url = currentVersion['url']
        logger.info(url)
        errors = currentVersion['err']

        if len(errors) > 0:
            logger.error(
                f'Feed {title} @ {url} had errors according to transitfeeds, not processing'
            )
            logger.error('\n'.join([f'   {e}' for e in errors]))
            return MISSING_VALUE

        id = currentVersion['id']
        logger.info(id)
        feedDestDir = os.path.join(FEED_DIR, id)
        logger.info('--> downloading to %s' % feedDestDir)
        pathlib.Path(feedDestDir).mkdir(parents=True, exist_ok=True)

        DOWNLOAD_FILENAME = 'gtfs.zip'
        # download the file
        fileDest = os.path.join(feedDestDir, DOWNLOAD_FILENAME)
        expectedSize = currentVersion['size']

        shouldDownload = True

        if os.path.exists(fileDest):
            logger.info(f'{fileDest} already exists')
            currentSize = os.stat(fileDest).st_size
            if currentSize == expectedSize:
                logger.info(f'{fileDest} is the right size, not redownloading')
                shouldDownload = False
            else:
                logger.info(
                    f'{fileDest} is the wrong size exp: {expectedSize} vs on-disk: {currentSize} - redownloading'
                )

        if shouldDownload:
            try:
                downloadFile(url, fileDest)
                logger.info(f'downloaded fileDest')
            except:
                traceback.print_exc()
                print(colored(f'could not download {url}', 'red'))
                return MISSING_VALUE

        # verify feed
        if not ValidateGtfs.validate(fileDest, logger):
            return MISSING_VALUE

        return (True, fileDest, id)
Exemple #2
0
def get_table(name, format='fits', folder='.'):

    cmd = 'java -jar casjobs.jar extract -table {} -force -type {} -url {}'
    cmd = cmd.format(name, format, folder)
    output = sp.check_output(cmd, shell=True)

    for line in output.decode().split('\n'):
        if line.startswith('http'):
            utils.downloadFile(line,
                               folder,
                               filename='{}.{}'.format(name, format))
 def loadEntity(self, uri):
     """
     Download the data for an entity to the current working
     directory, TODO synapse cache support to ensure we don't
     overwrite files
     """
     entity = self.getEntity(self.repoEndpoint, uri)
     locations = self.getEntity(self.repoEndpoint, entity['locations'])
     if (0 == len(locations['results'])):
         raise Exception("entity has no locations")
     location = locations['results'][0]
     url = location['path']
     parseResult = urlparse.urlparse(url)
     pathComponents = string.split(parseResult.path, '/')
     filename = pathComponents[len(pathComponents) - 1]
     utils.downloadFile(url, filename)
     return filename
 def loadEntity(self, uri):
     """
     Download the data for an entity to the current working
     directory, TODO synapse cache support to ensure we don't
     overwrite files
     """
     entity = self.getEntity(self.repoEndpoint, uri)
     locations = self.getEntity(self.repoEndpoint, entity["locations"])
     if 0 == len(locations["results"]):
         raise Exception("entity has no locations")
     location = locations["results"][0]
     url = location["path"]
     parseResult = urlparse.urlparse(url)
     pathComponents = string.split(parseResult.path, "/")
     filename = pathComponents[len(pathComponents) - 1]
     utils.downloadFile(url, filename)
     return filename
Exemple #5
0
def getPPS(obsid):
    """    
    Get PPS sources list for OBS_ID obsid.
    """
    url = 'http://nxsa.esac.esa.int/nxsa-sl/servlet/data-action-aio?'
    url += 'obsno={}&name=OBSMLI&level=PPS&extension=FTZ'.format(obsid)
    utils.downloadFile(url, '/tmp', 'temp.tar')

    try:
        utils.untarFile(os.path.join('/tmp', 'temp.tar'), '/tmp')
        xmm_srclist = glob.glob('/tmp/{0}/pps/P{0}EP*.FTZ'.format(obsid))

    except:
        # If untar fails, there is just one downloaded file
        # and is probably the EPIC source list
        xmm_srclist = ['/tmp/temp.tar']

    return xmm_srclist[0]
Exemple #6
0
 def post(self, type):
     res = 0
     if type == 'category':
         name = self.request.get('name')
         slug = self.request.get('slug')
         if db.addCategory(name, slug):
             res = 1
         self.redirect('/admin?res=%d' % res)
     elif type == 'author':
         img_url = self.request.get('img_url')
         if img_url:
             img_width, img_height = utils.getImageDimensions(utils.downloadFile(img_url))
         else:
             img_width = None
             img_height = None
         if db.addAuthor(name = self.request.get('name'),
                         slug = self.request.get('slug'),
                         description = self.request.get('description'),
                         date_birth = self.request.get('date_birth'),
                         date_death = self.request.get('date_death'),
                         img_url = img_url,
                         img_width = img_width,
                         img_height = img_height):
             res = 1
         self.redirect('/admin?res=%d' % res)
     elif type == 'quote':
         img_url = self.request.get('img_url')
         if img_url:
             img_width, img_height = utils.getImageDimensions(utils.downloadFile(img_url))
         else:
             img_width = None
             img_height = None
         if db.addQuote(author = db.getAuthor(self.request.get('author')),
                        categories = self.request.get_all('category'),
                        name = self.request.get('name'),
                        description = self.request.get('description'),
                        text = self.request.get('text'),
                        html = self.request.get('html'),
                        img_url = img_url,
                        img_width = img_width,
                        img_height = img_height,
                        quote_id = self.request.get('quote_id')):
             res = 1
         self.redirect('/admin?res=%d' % res)
Exemple #7
0
    def checkAndDownloadMostRecentJar(prefix, path):
        url = f'{prefix}{path}/maven-metadata.xml'
        document = requests.get(url).content.decode('utf-8')
        tree = ET.fromstring(document)
        lst = tree.find('./versioning/release')
        latestVersion = lst.text

        artifactId = tree.find('./artifactId').text

        expectedJar = f'{artifactId}-{latestVersion}.jar'
        if not os.path.exists(expectedJar):
            downloadUrl = f'{prefix}{path}/{latestVersion}/{expectedJar}'
            print(f'downloading {expectedJar} @ {downloadUrl}')
            downloadFile(downloadUrl, expectedJar)
            print(f'downloaded {expectedJar}')
        else:
            print(f'already had {expectedJar}')

        return expectedJar
    def downloadEntity(self, entity):
        """Download an entity and files associated with an entity to local cache
        TODO: Add storing of files in cache
        TODO: Add unpacking of files.
        
        Arguments:
        - `entity`: A synapse ID of entity (i.e dictionary describing an entity)
        Returns:
        - A dictionary representing an entity
        """
        entity = self.getEntity(entity)
        if not entity.has_key('locations'):
            return entity
        location = entity['locations'][0]  #TODO verify that this doesn't fail for unattached files
        url = location['path']
        parseResult = urlparse.urlparse(url)
        pathComponents = string.split(parseResult.path, '/')

        filename = os.path.join(self.cacheDir, entity['id'] ,pathComponents[-1])
        if os.path.exists(filename):
            #print filename, "cached"
            md5 = utils.computeMd5ForFile(filename)
            if md5.hexdigest() != entity.get('md5', ''):
                print filename, "changed, redownloading"
                utils.downloadFile(url, filename)
        else:
            print filename, 'downloading...',
            utils.downloadFile(url, filename)

        if entity['contentType']=='application/zip':
            ## Unpack file
            filepath=os.path.join(os.path.dirname(filename), os.path.basename(filename)+'_unpacked')
            #TODO!!!FIX THIS TO BE PATH SAFE!  DON'T ALLOW ARBITRARY UNZIPING
            z = zipfile.ZipFile(filename, 'r')
            z.extractall(filepath) #WARNING!!!NOT SAFE
            entity['cacheDir'] = filepath
            entity['files'] = z.namelist()
        else:
            entity['cacheDir'] = os.path.dirname(filename)
            entity['files'] = [os.path.basename(filename)]
        return entity
    def test_downloadFile(self):
        "test download file function in utils.py"
        result = utils.downloadFile("http://dev-versions.synapse.sagebase.org/sage_bionetworks_logo_274x128.png")
        if (result):
            # print("status: \"%s\"" % str(result[1].status))
            # print("filename: \"%s\"" % str(result[0]))
            filename = result[0]
            assert os.path.exists(filename)

            ## cleanup
            try:
                os.remove(filename)
            except:
                print("warning: couldn't delete file: \"%s\"\n" % filename)
        else:
            print("failed to download file: \"%s\"" % filename)
            assert False
Exemple #10
0
def download(root, gui: "App" = None):
    error_list = []
    total_count = 0
    skipped_count = 0
    processed_count = 0

    attachments_path = os.path.join(root, "Resources", "attachments")

    html_file_count = 0
    for path, subdirs, files in os.walk(root):
        for name in files:
            if name.endswith(".html"):
                html_file_count += 1

    current_file_idx = 0
    for path, subdirs, files in os.walk(root):
        for name in files:
            if name.endswith(".html"):
                current_file_idx += 1
                fpath = os.path.join(path, name)
                print("Loading:", fpath)

                setGuiFileDownloaderInfo(gui,
                                         week="Searching",
                                         topic="All html files",
                                         filename="",
                                         url="",
                                         output="",
                                         eta="",
                                         speed="",
                                         dl_size="",
                                         file_size="",
                                         progress=0,
                                         current_no=0,
                                         total_files=0)

                f = open(fpath, "r", encoding='utf-8')
                html_text = f.read()
                f.close()
                soup = BeautifulSoup(html_text, 'html.parser')
                # print(soup.get_text)
                attachment_tags = soup.find_all('a',
                                                {"class": "cml-asset-link"})
                asset_containers = soup.find_all('div',
                                                 {"class": "asset-container"})

                print(
                    len(attachment_tags) + len(asset_containers),
                    "attachment(s) found")
                file_modified = False

                current_file_total_count = len(attachment_tags) + len(
                    asset_containers)
                total_count += current_file_total_count

                new_attachment_href = "../../Resources"

                if fpath.find("{}Resources{}".format(os.path.sep,
                                                     os.path.sep)) >= 0:
                    new_attachment_href = "../Resources"

                if len(asset_containers) == 0:
                    # Update GUI Progress
                    dl_size = "{} of {}".format(0, 0)
                    setGuiFileDownloaderInfo(gui,
                                             week="Loading",
                                             topic="",
                                             filename=name,
                                             url="",
                                             output=fpath,
                                             dl_size=dl_size,
                                             file_size="",
                                             progress=100,
                                             current_no=current_file_idx,
                                             total_files=html_file_count)

                for idx, asset_container in enumerate(asset_containers):
                    attachment_tag = asset_container.find('a')
                    attach_filename = asset_container.find(
                        "span", {
                            "class": "asset-name"
                        }).text
                    attach_filename = utils.getFormattedFileName(
                        attach_filename)
                    # print(link.get("href"))
                    attach_href = attachment_tag.get('href')

                    # Update GUI Progress
                    progress = (idx + 1) / current_file_total_count * 100
                    dl_size = "{} of {}".format(idx + 1,
                                                current_file_total_count)
                    setGuiFileDownloaderInfo(gui,
                                             week="Loading",
                                             topic="",
                                             filename=name,
                                             url=attach_href,
                                             output=fpath,
                                             dl_size=dl_size,
                                             file_size="",
                                             progress=progress,
                                             current_no=current_file_idx,
                                             total_files=html_file_count)

                    print("Attachment {}/{}:".format(idx + 1,
                                                     len(asset_containers)),
                          end=" ")
                    if attach_href.find(new_attachment_href) >= 0:
                        print("Already processed. Skipping...")
                        skipped_count += 1
                        continue
                    elif attach_href == "":
                        error_list.append({
                            "error": "blank href",
                            "path": fpath
                        })
                        print("Error: Blank href")
                        continue
                    try:
                        attah_filename = utils.downloadFile(
                            attach_href, attachments_path, attach_filename)
                        file_modified = True
                        processed_count += 1
                        attachment_tag[
                            'href'] = new_attachment_href + "/attachments/" + attah_filename
                    except Exception as e:
                        print("Error:", e)
                        error_list.append({
                            "error": "url",
                            "url": attach_href,
                            "path": fpath
                        })
                        continue

                if len(attachment_tags) == 0:
                    # Update GUI Progress
                    dl_size = "{} of {}".format(0, 0)
                    setGuiFileDownloaderInfo(gui,
                                             week="Loading",
                                             topic="",
                                             filename=name,
                                             url="",
                                             output=fpath,
                                             dl_size=dl_size,
                                             file_size="",
                                             progress=100,
                                             current_no=current_file_idx,
                                             total_files=html_file_count)

                for idx, attachment_tag in enumerate(attachment_tags):
                    attach_href = attachment_tag.get('href')
                    attach_filename = attachment_tag.text
                    attach_filename = utils.getFormattedFileName(
                        attach_filename)

                    # Update GUI Progress
                    progress = (len(asset_containers) + idx +
                                1) / current_file_total_count * 100
                    dl_size = "{} of {}".format(
                        len(asset_containers) + idx + 1,
                        current_file_total_count)
                    setGuiFileDownloaderInfo(gui,
                                             week="Loading",
                                             topic="",
                                             filename=name,
                                             url=attach_href,
                                             output=fpath,
                                             dl_size=dl_size,
                                             file_size="",
                                             progress=progress,
                                             current_no=current_file_idx,
                                             total_files=html_file_count)

                    print("Attachment {}/{}:".format(idx + 1,
                                                     len(attachment_tags)),
                          end=" ")
                    if attach_href.find(new_attachment_href) >= 0:
                        print("Already processed. Skipping...")
                        skipped_count += 1
                        continue
                    elif attach_href == "":
                        error_list.append({
                            "error": "blank href",
                            "path": fpath
                        })
                        print("Error: Blank href")
                        continue
                    try:
                        attah_filename = utils.downloadFile(
                            attach_href, attachments_path, attach_filename)
                        file_modified = True
                        processed_count += 1
                        attachment_tag[
                            'href'] = new_attachment_href + "/attachments/" + attah_filename
                    except Exception as e:
                        print("Error:", e)
                        error_list.append({
                            "error": "url",
                            "url": attach_href,
                            "path": fpath
                        })
                        continue

                if file_modified:
                    utils.savePlainFile(fpath, str(soup))
                print()

    print("Total:", total_count, "attachment(s)")
    print("Processed:", processed_count, "attachment(s)")
    print("Skipped:", skipped_count, "attachment(s)")
    print("Errors:", len(error_list))
    print(error_list)

    # setGuiFileDownloaderInfo(gui, week="Success", topic="Attachment processing finished successfully!")

    with open("data/attach_errors.json", "w") as out_file:
        json.dump(error_list, out_file)
Exemple #11
0
def make_cat(opt_survey,
             nir_survey,
             radius=15 * u.arcmin,
             poscorr=False,
             make_mocs=False,
             getXdata=False,
             getOPTdata=False,
             getWSdata=False,
             getNIRdata=False,
             define_bins=False,
             make_bins=False,
             make_xmatch=False):

    cat_url = 'http://xmmssc.irap.omp.eu/Catalogue/3XMM-DR8'
    obs_filename = '3xmmdr8_obslist.fits'
    det_filename = '3XMM_DR8cat_v1.0.fits'
    src_filename = '3XMM_DR8cat_slim_v1.0.fits'

    # Increase in radius to avoid border effects in the crossmatch:
    delta_radius = 0.3 * u.arcmin  # (18 arcsec, like ARCHES)

    # Define structure of data directories
    dirs = dir_structure(opt_survey, nir_survey)

    # Get moc for the optical survey footprint
    if opt_survey == 'pstarrs':
        opt_label = 'PS'
        opt_moc = None

    elif opt_survey == 'sdss':
        opt_label = 'SDSS'
        opt_moc = utils.get_moc(('http://alasky.unistra.fr/footprints/tables/'
                                 'vizier/V_139_sdss9/MOC?nside=2048'),
                                opt_survey, dirs['opt'])
    else:
        raise ValueError('Unknown optical survey!')

    # Get moc for the nir survey footprint
    if nir_survey is '2MASS':
        nir_label = 'NTM'
        url_moc = None
        errtype_nir = 'ellipse'

    elif nir_survey is 'UKIDSS':
        nir_label = 'NUK'
        url_moc = 'http://horus.roe.ac.uk/vsa/coverage-maps/UKIDSS/DR10/'
        errtype_nir = 'rcd_dec_ellipse'

    elif nir_survey is 'VISTA':
        nir_label = 'NVT'
        url_moc = 'http://horus.roe.ac.uk/vsa/coverage-maps/VISTA/VHS/'
        errtype_nir = 'circle'

    else:
        raise ValueError('Unknown near-infrared survey!')

    nir_moc = utils.get_moc(url_moc, nir_survey, dirs['nir'])

    ### Get the list of XMM-Newton observations in the XMM catalogue
    xmmobsids_file_org = os.path.join(dirs['xmm'], obs_filename)

    if not os.path.isfile(xmmobsids_file_org):
        utils.downloadFile(os.path.join(cat_url, obs_filename), dirs['xmm'])

    ### Select valid obsids (clean observations and in the optical footprint)
    try:
        xmmobs = Table.read(xmmobsids_file_org)

    except:
        message = 'Unable to open XMM OBSIDs table!!!\nFile: {}'
        logging.error(message.format(xmmobsids_file_org))
        return

    xmmobs_clean_opt = clean_obsids(xmmobs, radius, opt_survey, opt_moc)

    ### Define non-overlapping mocs for the obsids
    if make_mocs:
        xmmmocs.make_mocs(xmmobs_clean_opt,
                          dirs['xmm'],
                          moc_order=15,
                          radius=radius + delta_radius,
                          remove_stars=True,
                          remove_large_galaxies=True)

    ### Get data for the cross-match
    ## X-rays
    # Check if the XMM sources catalogue exists, and download otherwise
    xmmcat_file = os.path.join(dirs['xmm'], src_filename)

    if not os.path.isfile(xmmcat_file):
        src_filename_gz = '{}.gz'.format(src_filename)
        utils.downloadFile(os.path.join(cat_url, src_filename_gz), dirs['xmm'])
        utils.gunzipFile(os.path.join(dirs['xmm'], src_filename_gz),
                         xmmcat_file)

    # Correct astrometry of XMM sources
    if poscorr:
        # Check if the detections catalogue exists and download otherwise
        xmmdet_file = os.path.join(dirs['xmm'], det_filename)

        if not os.path.isfile(xmmdet_file):
            det_filename_gz = '{}.gz'.format(det_filename)
            utils.downloadFile(os.path.join(cat_url, det_filename_gz),
                               dirs['xmm'])
            utils.gunzipFile(os.path.join(dirs['xmm'], det_filename_gz),
                             xmmdet_file)

        xposcorr.run(xmmdet_file, xmmcat_file, xmmobs_clean_opt, dirs['xmm'])

    # Make files with X-ray sources per non-overlaping field
    file_name, file_ext = os.path.splitext(xmmobsids_file_org)
    xmmobsids_file = '{}_{}_clean_{}{}'.format(file_name, nir_survey.lower(),
                                               opt_survey, file_ext)

    if getXdata:
        xmmobs_xdata = getdata.xmm(xmmobs_clean_opt,
                                   dirs['xmm'],
                                   xmmcat_file,
                                   nir_moc=nir_moc,
                                   opt_moc=opt_moc,
                                   moc_order=15,
                                   radius=radius,
                                   use_poscorr=False)

        # Save selected obsids
        # (with Texp and sky area, remove fields with no sources)
        xmmobs_xdata.write(xmmobsids_file, overwrite=True)

    else:
        xmmobs_xdata = Table.read(xmmobsids_file)

    ## Optical
    file_name, file_ext = os.path.splitext(xmmobsids_file)
    xmmobsids_file = '{}_n{}src{}'.format(file_name, opt_label, file_ext)

    if getOPTdata:
        if opt_survey == 'pstarrs':
            xmmobs_optdata = getdata.pstarrs(xmmobs_xdata,
                                             dirs['opt'],
                                             dirs['xmm'],
                                             nir_moc=nir_moc,
                                             radius=radius + delta_radius,
                                             moc_order=15,
                                             overwrite=False)
        elif opt_survey == 'sdss':
            xmmobs_optdata = getdata.sdss(xmmobs_xdata,
                                          dirs['opt'],
                                          dirs['xmm'],
                                          nir_moc=nir_moc,
                                          radius=radius + delta_radius,
                                          moc_order=15,
                                          overwrite=False)
        else:
            raise ValueError('Unknown optical survey!')

        xmmobs_optdata.write(xmmobsids_file, overwrite=True)

    else:
        xmmobs_optdata = Table.read(xmmobsids_file)

    ## All-WISE
    file_name, file_ext = os.path.splitext(xmmobsids_file)
    xmmobsids_file = '{}_nWSsrc{}'.format(file_name, file_ext)

    if getWSdata:
        xmmobs_wsdata = getdata.wise(xmmobs_optdata,
                                     dirs['wise'],
                                     dirs['xmm'],
                                     nir_moc=nir_moc,
                                     opt_moc=opt_moc,
                                     radius=radius + delta_radius,
                                     moc_order=15,
                                     overwrite=False)

        xmmobs_wsdata.write(xmmobsids_file, overwrite=True)

    else:
        xmmobs_wsdata = Table.read(xmmobsids_file)

    ## NIR data
    file_name, file_ext = os.path.splitext(xmmobsids_file)
    xmmobsids_file = '{}_n{}src{}'.format(file_name, nir_label, file_ext)

    if getNIRdata:
        if nir_survey is '2MASS':
            xmmobs_nirdata = getdata.tmass(xmmobs_wsdata,
                                           dirs['nir'],
                                           dirs['xmm'],
                                           moc_order=15,
                                           opt_moc=opt_moc,
                                           radius=radius + delta_radius,
                                           overwrite=False)
        elif nir_survey is 'UKIDSS':
            xmmobs_nirdata = getdata.ukidss(xmmobs_wsdata,
                                            dirs['nir'],
                                            dirs['xmm'],
                                            moc_order=15,
                                            opt_moc=opt_moc,
                                            radius=radius + delta_radius,
                                            overwrite=False)
        elif nir_survey is 'VISTA':
            xmmobs_nirdata = getdata.vista(xmmobs_wsdata,
                                           dirs['nir'],
                                           dirs['xmm'],
                                           moc_order=15,
                                           opt_moc=opt_moc,
                                           radius=radius + delta_radius,
                                           overwrite=False)

        xmmobs_nirdata.write(xmmobsids_file, overwrite=True)

    else:
        xmmobs_nirdata = Table.read(xmmobsids_file)

    ### Calculate bins according to XMM exposure time and galactic latitude
    file_name, file_ext = os.path.splitext(xmmobsids_file_org)
    xmmobsids_file = '{}_{}_bins{}'.format(file_name, nir_survey.lower(),
                                           file_ext)
    if define_bins:
        ## Galactic latitude binning
        xmmobs_optbin = binning.optical(xmmobs_nirdata, dirs['data'],
                                        nir_survey, opt_survey)
        ## XMM exposure binning
        xmmobs_bins = binning.final(xmmobs_optbin, dirs['data'], nir_survey)
        xmmobs_bins.write(xmmobsids_file, overwrite=True)

    else:
        xmmobs_bins = Table.read(xmmobsids_file)

    ### Make bins
    if make_bins:
        binning.makebins(xmmobs_bins,
                         dirs['xmm'],
                         'XMM',
                         nir_survey,
                         errtype='circle')
        binning.makebins(xmmobs_bins,
                         dirs['opt'],
                         opt_survey,
                         nir_survey,
                         errtype='rcd_dec_ellipse')
        binning.makebins(xmmobs_bins,
                         dirs['wise'],
                         'WISE',
                         nir_survey,
                         errtype='ellipse')
        binning.makebins(xmmobs_bins,
                         dirs['nir'],
                         nir_survey,
                         errtype=errtype_nir)

    ### Crossmatching of catalogues
    xmatchcat_filename = '{}_xmatchcat.fits'.format(nir_survey.lower())
    xmatchcat_filename = os.path.join(dirs['xmatch'], xmatchcat_filename)

    if make_xmatch:
        stats_filename = '{}_bins.fits'.format(nir_survey.lower())
        bin_stats = Table.read(os.path.join(dirs['data'], stats_filename))

        crossmatching.run(bin_stats, dirs, opt_survey, opt_label, nir_survey,
                          nir_label)
        crossmatching.merge_bins(bin_stats, dirs, opt_survey, nir_survey)
        xmatch_cat = crossmatching.merge_cats(dirs, opt_survey, opt_label,
                                              nir_survey, nir_label)
        xmatch_cat.write(xmatchcat_filename, overwrite=True)

    else:
        xmatch_cat = Table.read(xmatchcat_filename, memmap=True)

    return xmatch_cat
Exemple #12
0
    def downloadImages(self):
        root = self.root
        error_list = []
        total_count = 0
        skipped_count = 0
        processed_count = 0

        img_path = os.path.join(root, "Resources", "html", "img")

        self.setGuiFileDownloaderInfo(week="Searching",
                                      topic="All html files",
                                      filename="",
                                      url="",
                                      output="",
                                      eta="",
                                      speed="",
                                      dl_size="",
                                      file_size="",
                                      progress=0,
                                      current_no=0,
                                      total_files=0)

        html_file_count = 0
        for path, subdirs, files in os.walk(root):
            for name in files:
                if name.endswith(".html"):
                    html_file_count += 1

        current_file_idx = 0
        for path, subdirs, files in os.walk(root):
            for name in files:
                if name.endswith(".html"):
                    current_file_idx += 1

                    fpath = os.path.join(path, name)
                    print("Loading:", fpath)

                    f = open(fpath, "r", encoding='utf-8')
                    html_text = f.read()
                    f.close()
                    soup = BeautifulSoup(html_text, 'html.parser')
                    # print(soup.get_text)
                    imgTags = soup.find_all('img')
                    print(len(imgTags), "image(s) found")
                    file_modified = False
                    total_count += len(imgTags)

                    new_img_src = "../../Resources"

                    if fpath.find("{}Resources{}".format(
                            os.path.sep, os.path.sep)) >= 0:
                        new_img_src = "../Resources"

                    if len(imgTags) == 0:
                        # Update GUI Progress
                        dl_size = "{} of {}".format(0, len(imgTags))
                        self.setGuiFileDownloaderInfo(
                            week="Loading",
                            topic="",
                            filename=name,
                            url="",
                            output=fpath,
                            dl_size=dl_size,
                            file_size="",
                            progress=100,
                            current_no=current_file_idx,
                            total_files=html_file_count)

                    for idx, img in enumerate(imgTags):
                        imgUrl = img.get('src')

                        # Update GUI Progress
                        progress = (idx + 1) / len(imgTags) * 100
                        dl_size = "{} of {}".format(idx + 1, len(imgTags))
                        self.setGuiFileDownloaderInfo(
                            week="Loading",
                            topic="",
                            filename=name,
                            url=imgUrl,
                            output=fpath,
                            dl_size=dl_size,
                            file_size="",
                            progress=progress,
                            current_no=current_file_idx,
                            total_files=html_file_count)

                        print("Image {}/{}:".format(idx + 1, len(imgTags)),
                              end=" ")
                        if imgUrl.find(new_img_src) >= 0:
                            print("Already processed. Skipping...")
                            skipped_count += 1
                            continue
                        elif imgUrl == "":
                            error_list.append({
                                "error": "blank img src",
                                "path": fpath
                            })
                            print("Error: Blank img src")
                            continue
                        # print(imgUrl)
                        try:
                            imgFilename = utils.downloadFile(imgUrl, img_path)
                            file_modified = True
                            processed_count += 1
                            img['src'] = new_img_src + "/html/img/" + imgFilename
                        except Exception as e:
                            print("Error:", e)
                            error_list.append({
                                "error": "url",
                                "url": imgUrl,
                                "path": fpath
                            })
                            continue

                    if file_modified:
                        utils.savePlainFile(fpath, str(soup))
                    print()

        print("Total:", total_count, "image(s)")
        print("Processed:", processed_count, "image(s)")
        print("Skipped:", skipped_count, "image(s)")
        print("Errors:", len(error_list))
        print(error_list)
Exemple #13
0
    def downloadExternalExercise(self):
        root = self.root
        links = self.download_queue_assignment

        if not links:
            print("Empty Links")
            return False

        error_list = []
        total_count = 0
        skipped_count = 0
        processed_count = 0

        self.setGuiFileDownloaderInfo(week="Loading",
                                      topic="External Exercise",
                                      filename="",
                                      url="",
                                      output="",
                                      eta="",
                                      speed="",
                                      dl_size="",
                                      file_size="",
                                      progress=0,
                                      current_no=0,
                                      total_files=0)

        total_links = len(links)

        for link_idx, item in enumerate(links):
            path = item["path"]
            tmp = path.split("\\")
            week = tmp[0]
            topic = tmp[1]
            prefix = week.replace("Week ", "0") + topic[:2]

            base_link = item["url"]
            html = utils.getFile(base_link)
            soup = BeautifulSoup(html, 'html.parser')

            # print(soup.get_text)

            title_tag = soup.find('title')
            link_tags = soup.find_all('link')
            script_tags = soup.find_all('script')
            img_tags = soup.find_all('img')

            title = title_tag.text
            folder_name = prefix + "_" + utils.getFormattedFileName(
                title.lower().replace(" ", "_"))

            resource_path = os.path.join(root, "Resources", 'html',
                                         folder_name)
            media_path = os.path.join(root, "Resources", 'html', "media")

            # index_file_name = utils.getFormattedFileName(title) + ".html"
            index_file_name = item['filename']

            # print(folder_name)

            print(len(link_tags), "links(s) found")
            print(len(script_tags), "script(s) found")
            print(len(img_tags), "image(s) found")

            # print(link_tags)

            link_total_count = len(link_tags) + len(script_tags)
            total_count += link_total_count

            # print(script_tags)

            for idx, link_tag in enumerate(link_tags):
                src = link_tag.get("href")
                url = utils.getFullUrl(base_link, src)
                # print(url)

                # Update GUI Progress
                progress = (idx + 1) / link_total_count * 100
                dl_size = "{} of {}".format(idx + 1, link_total_count)
                self.setGuiFileDownloaderInfo(week=week,
                                              topic=topic,
                                              filename=index_file_name,
                                              url=url,
                                              output=resource_path,
                                              dl_size=dl_size,
                                              file_size="",
                                              progress=progress,
                                              current_no=link_idx + 1,
                                              total_files=total_links)

                print("Link {}/{}:".format(idx + 1, len(link_tags)), end=" ")
                if src == "":
                    error_list.append({"error": "blank href", "path": path})
                    print("Error: Blank href")
                    continue
                try:
                    link_filename = utils.downloadFile(url, resource_path)
                    processed_count += 1
                    link_tag[
                        'href'] = "../../Resources/html/" + folder_name + "/" + link_filename
                except Exception as e:
                    print("Error:", e)
                    error_list.append({
                        "error": "url",
                        "url": url,
                        "path": path
                    })
                    continue

            for idx, script_tag in enumerate(script_tags):
                progress = (len(link_tags) + idx + 1) / link_total_count * 100
                dl_size = "{} of {}".format(
                    len(link_tags) + idx + 1, link_total_count)

                # Update GUI Progress
                self.setGuiFileDownloaderInfo(week=week,
                                              topic=topic,
                                              filename=index_file_name,
                                              output=resource_path,
                                              dl_size=dl_size,
                                              file_size="",
                                              progress=progress,
                                              current_no=link_idx + 1,
                                              total_files=total_links)

                src = script_tag.get("src")
                if src is None:
                    print(
                        "External src not found. Maybe internal script. Skipping..."
                    )
                    skipped_count += 1
                    continue

                url = utils.getFullUrl(base_link, src)

                # Update GUI Progress
                self.setGuiFileDownloaderInfo(week=week,
                                              topic=topic,
                                              filename=index_file_name,
                                              url=url,
                                              output=resource_path,
                                              dl_size=dl_size,
                                              file_size="",
                                              progress=progress,
                                              current_no=link_idx + 1,
                                              total_files=total_links)

                print("Script {}/{}:".format(idx + 1, len(link_tags)), end=" ")
                if src == "":
                    error_list.append({"error": "blank src", "path": path})
                    print("Error: Blank src")
                    continue
                try:
                    if src.find("main") >= 0:
                        js_file = utils.getFile(url).decode("utf-8")

                        count_static = js_file.count("static")
                        external_links = re.findall(
                            "(static[/a-zA-Z._0-9-@]*)", js_file)
                        external_links_count = len(external_links)

                        print(
                            "Found {} external links in main.js, now downloading"
                            .format(external_links_count))
                        for ext_idx, external_link in enumerate(
                                external_links):
                            external_link_url = urljoin(
                                base_link, external_link)

                            # Update GUI Progress
                            curr_progress = (ext_idx + 1) / len(external_links)
                            prev_progress = (len(link_tags) +
                                             idx) / link_total_count * 100
                            progress = prev_progress + (100 * curr_progress /
                                                        link_total_count)
                            # progress = (len(link_tags) + idx + 1 + ext_idx + 1) / (link_total_count + len(external_links)) * 100
                            # dl_size = "{} of {}".format(len(link_tags) + idx + 1 + ext_idx + 1, link_total_count + len(external_links))
                            dl_size = "{} of {}".format(
                                len(link_tags) + idx + 1, link_total_count)
                            self.setGuiFileDownloaderInfo(
                                week=week,
                                topic=topic,
                                filename=index_file_name,
                                url=external_link_url,
                                output=resource_path,
                                dl_size=dl_size,
                                file_size="",
                                progress=progress,
                                current_no=link_idx + 1,
                                total_files=total_links)

                            print("External Link {}/{}:".format(
                                ext_idx + 1, external_links_count),
                                  end=" ")
                            utils.downloadFile(external_link_url, media_path)

                        if count_static != external_links_count:
                            print(
                                "WARNING: Downloaded {} external links but found {}"
                                .format(external_links_count, count_static))

                        js_file = js_file.replace("static/",
                                                  "../../Resources/html/")
                        js_file_path = os.path.join(root, "Resources", 'html',
                                                    folder_name, "main.js")
                        link_filename = utils.savePlainFile(
                            js_file_path, js_file)
                    else:
                        link_filename = utils.downloadFile(url, resource_path)

                    processed_count += 1
                    script_tag[
                        'src'] = "../../Resources/html/" + folder_name + "/" + link_filename
                except Exception as e:
                    print("Error:", e)
                    error_list.append({
                        "error": "url",
                        "url": url,
                        "path": path
                    })
                    continue

            save_path = os.path.join(root, path, index_file_name)
            utils.savePlainFile(save_path, str(soup))
            print()

        print("Total:", total_count, "file(s)")
        print("Processed:", processed_count, "file(s)")
        print("Skipped:", skipped_count, "file(s)")
        print("Errors:", len(error_list))
        print(error_list)
Exemple #14
0
def main():
    if utils.DATA['version'] != VERSION:
        print('Your version of Launchcraft ({}) does not match the minimum version of Launchcraft ({}). Please update.'.format(VERSION, utils.DATA['version']))
        utils.exit()

    print('This script will ask you yes or no questions.')
    print('Any answers in square brackets (e.g. [1.7.10]), or that are capitalized (e.g. [Y/n]) are the default answers, and will be selected when you press enter.')
    utils.print_separator()

    version = raw_input('Which version of Minecraft would you like to use? [1.7.10]: ').lower()
    if version == '':
        version = '1.7.10'

    if version not in utils.DATA['versions']:
        print("Invalid version selected.")
        utils.exit()

    utils.MODS = utils.DATA['versions'][version]

    JAR_DIR = os.path.join(VERSIONS_DIR, version)

    FORGE_VERSION = '{}-Forge{}'.format(version, utils.MODS['mods']['forge']['version'])
    FORGE_DIR = os.path.join(VERSIONS_DIR, FORGE_VERSION)

    print('Entering directory "{}".'.format(MINECRAFT_DIR))
    try:
        os.chdir(MINECRAFT_DIR)
    except:
        print('Failed to enter minecraft directory, please install minecraft first.')
        utils.exit()
    utils.print_separator()

    # Set the directory to which the custom profile will be installed.
    profile_name = raw_input('What would you like to call the profile being created? [launchcraft]: ').lower()
    if profile_name == '':
        profile_name = 'launchcraft'
    PROFILE_DIR = os.path.join(VERSIONS_DIR, profile_name)
    print('Creating profile {}'.format(profile_name))

    # Delete the old profile directory so we can start from scratch.
    try:
        shutil.rmtree(PROFILE_DIR)
        print('Removed old profile directory.')
    except OSError as ex:
        if ex.errno == errno.ENOENT:
            print('No old profile directory found.')
        else:
            print(ex)
            print('Failed to remove old profile directory, exiting...')
            utils.exit()
    utils.print_separator()

    forge = utils.query_yes_no('Would you like to use Forge?', default='no')
    if forge:
        if os.path.exists(FORGE_DIR):
            print('The required Forge version has been detected on your system.')
            message = 'reinstall'
        else:
            print('The required Forge version has not been detected on your system.')
            message = 'install'
        # Ask the user whether or not they need Forge.
        if utils.query_yes_no('Do you need to {} Forge?'.format(message), default='no'):
            forge = utils.MODS['mods']['forge']
            name = forge['name']
            version = forge['version']
            jarName = 'forge.jar'

            if sys.platform == 'win32' or sys.platform == 'cygwin':
                os.chdir(BASE_DIR)

            # Download the Forge installer.
            print('Downloading {} version {}'.format(name, version))
            utils.downloadFile(forge['url'], jarName)

            if sys.platform == 'win32' or sys.platform == 'cygwin':
                print('You must now run the {} that has been downloaded to your Launchcraft directory.'.format(jarName))
                utils.exit()
            else:
                # Run the installer so the user can install Forge.
                print('You will now be asked to install Forge version {}.'.format(version))
                with open(os.devnull, 'w') as devnull:
                    subprocess.call('java -jar {}'.format(jarName), shell=True, stdout=devnull)

                os.remove(jarName)
    utils.print_separator()

    JAR_FILE = os.path.join(PROFILE_DIR, '{}.jar'.format(profile_name))
    JSON_FILE = os.path.join(PROFILE_DIR, '{}.json'.format(profile_name))

    if forge:
        print('Using Forge {} as the base for the profile'.format(utils.MODS['mods']['forge']['version']))
        if not os.path.exists(MOD_DIR):
            os.makedirs(MOD_DIR)

        utils.INSTALLED_MODS.append('forge')
        JAR_DIR = FORGE_DIR
        print('Creating new profile directory.')
        shutil.copytree(FORGE_DIR, PROFILE_DIR)
        print('Renaming Forge jar.')
        shutil.move(os.path.join(PROFILE_DIR, '{}.jar'.format(FORGE_VERSION)), JAR_FILE)
        SOURCE_JSON_FILE = '{}.json'.format(FORGE_VERSION)

        print('Entering newly created profile directory.')
        os.chdir(PROFILE_DIR)
    else:
        print('Using Minecraft {} as the base for the profile'.format(version))
        # Create the profile directory.
        try:
            print('Creating new profile directory.')
            os.makedirs(PROFILE_DIR)
        except OSError as ex:
            print(ex)
            print('Failed to create new profile directory, exiting...')
            utils.exit()

        print('Entering newly created profile directory.')
        os.chdir(PROFILE_DIR)

        print('Downloading "{0}.jar" and "{0}.json".'.format(version))
        utils.downloadFile('https://s3.amazonaws.com/Minecraft.Download/versions/{0}/{0}.jar'.format(version), '{}.jar'.format(profile_name))
        utils.downloadFile('https://s3.amazonaws.com/Minecraft.Download/versions/{0}/{0}.json'.format(version), '{}.json'.format(version))
        SOURCE_JSON_FILE = '{}.json'.format(version)

    print('Creating "{}.json".'.format(profile_name))
    with open('{}'.format(SOURCE_JSON_FILE), "r") as file:
        data = json.load(file)
    data['id'] = profile_name
    with open(JSON_FILE, "w") as file:
        json.dump(data, file, indent=4)

    print('Deleting "{}".'.format(SOURCE_JSON_FILE))
    os.remove(SOURCE_JSON_FILE)
    utils.print_separator()

    if utils.query_yes_no('Do you want to install mods?', default='no'):
        print('Which mods would you like to install?')
        toInstall = utils.printAskOptions(utils.MODS['mods'])
        print('Installing mods.')
        print('')
        for mod in toInstall:
            modData = utils.MODS['mods'][mod]
            skip = False

            conflicts = [i for i in modData['conflicts'] if i in utils.INSTALLED_MODS]

            if mod == 'forge':
                continue

            # Do not install forge-dependant mods if Forge is not installed.
            if 'forge' in modData['deps'] and 'forge' not in utils.INSTALLED_MODS:
                print('Skipping {} due to missing Forge'.format(modData['name']))
                skip = True
            # Skip conflicting mods
            elif conflicts:
                conflicting_mods = ""
                for i in conflicts:
                    conflicting_mods += utils.MODS['mods'][i]['name'] + ", "
                print('Skipping {} because it conflicts with {}'.format(modData['name'], conflicting_mods[:-2]))
                skip = True

            if skip:
                print('')
                continue

            utils.installDep(mod, JAR_FILE)
            print('')

    utils.removeMETAINF(JAR_FILE)
    utils.print_separator()

    if utils.query_yes_no('Do you want to install texture packs?', default='no'):
        if not os.path.exists(RESOURCEPACK_DIR):
            os.makedirs(RESOURCEPACK_DIR)
        print("What texture packs would you like to install?")
        toInstall = utils.printAskOptions(utils.MODS['resourcepacks'])
        print('Installing resourcepacks.')
        print('')
        for pack in toInstall:
            packData = utils.MODS['resourcepacks'][pack]

            utils.installResourcePack(pack)
            print('')
    utils.print_separator()

    if utils.query_yes_no('Do you want to install shader packs?', default='no'):
        if not os.path.exists(SHADERPACK_DIR):
            os.makedirs(SHADERPACK_DIR)
        print("What shader packs would you like to install?")
        toInstall = utils.printAskOptions(utils.MODS['shaderpacks'])
        print('Installing shaderpacks.')
        print('')
        for pack in toInstall:
            packData = utils.MODS['shaderpacks'][pack]

            utils.installShaderPack(pack)
            print('')
    utils.print_separator()

    print('Completed successfully!')
    utils.exit()

    try:
        input('Press any key to exit...')
    except:
        pass