Esempio n. 1
0
def build_complete_model(connection_file):
  # distribute
  nmc.build_net_round_robin(getmodel(), connection_file)
  import distribute
  import multisplit_distrib
  multisplit_distrib.multisplit_distrib(distribute.getmodel())

  # set initial weights
  if len(params.initial_weights) > 0:
    weightsave.weight_load(params.initial_weights)

  # print sections
  nc = h.List("NetCon")
  nc = int(pc.allreduce(nc.count(),1))
  if rank == 0: print "NetCon count = ", nc
  nseg = 0
  for sec in h.allsec():
    nseg += sec.nseg
  nseg = int(pc.allreduce(nseg, 1))
  if rank == 0: print "Total # compartments = ", nseg

  util.show_progress(200)

  #from odorstim import OdorSequence
  #odseq = OdorSequence(params.odor_sequence)

  if rank == 0: print 'total setup time ', h.startsw()-startsw
Esempio n. 2
0
    def start(self, start_state):
        """Start point of MCTS algorithm

        Args:
            game_state: game object which represents current game state

        Returns:
            act_index: index of best action of root node.
        """
        self.ME = start_state.next_player
        self.overflow_flg = False
        v_0 = Node(start_state.act_num)
        st = time.time()
        counter = 0
        
        while self.within_budget(st, counter) and not self.overflow_flg:
            if self.show_progress:
                util.show_progress(counter, self.limit)
            game_state = start_state.clone()
            v_l = self.tree_policy(v_0, game_state)
            delta = self.default_policy(v_l, game_state)
            self.backpropagation(v_l, delta)
            counter += 1
        if self.show_progress: util.fin_progress()
        act_index = self.best_child(v_0, 0)
        return v_0, act_index
def build_complete_model(connection_file):
    # distribute
    nmc.build_net_round_robin(getmodel(), connection_file)
    import distribute
    import multisplit_distrib
    multisplit_distrib.multisplit_distrib(distribute.getmodel())

    # set initial weights
    if len(params.initial_weights) > 0:
        weightsave.weight_load(params.initial_weights)

    # print sections
    nc = h.List("NetCon")
    nc = int(pc.allreduce(nc.count(), 1))
    if rank == 0: print "NetCon count = ", nc
    nseg = 0
    for sec in h.allsec():
        nseg += sec.nseg
    nseg = int(pc.allreduce(nseg, 1))
    if rank == 0: print "Total # compartments = ", nseg

    util.show_progress(200)

    import custom_params
    if custom_params.enableOdorInput:
        from odorstim import OdorSequence
        odseq = OdorSequence(params.odor_sequence)

    if rank == 0: print 'total setup time ', h.startsw() - startsw
def build_part_model(gloms, mitrals, dicfile=''):

    model = getmodel()
    model.clear()

    # gids
    gids = set()
    for glomid in gloms:
        gids.update(range(glomid * params.Nmitral_per_glom, (glomid+1) * params.Nmitral_per_glom) + \
                    range(glomid * params.Nmtufted_per_glom + params.gid_mtufted_begin, (glomid+1) * params.Nmtufted_per_glom + params.gid_mtufted_begin))
    gids.update(mitrals)

    # distribute
    nmc.build_net_round_robin(model, gids, dicfile)

    import distribute
    if False:
        # CoreNEURON does not support multisplit
        import multisplit_distrib
        multisplit_distrib.multisplit_distrib(distribute.getmodel())

    if params.gap_junctions_active:
        gapjunc.init()

    # set initial weights
    if len(params.initial_weights) > 0:
        weightsave.weight_load(params.initial_weights)

    # print sections
    nc = h.List("NetCon")
    nc = int(pc.allreduce(nc.count(), 1))
    if rank == 0: print "NetCon count = ", nc
    nseg = 0
    for sec in h.allsec():
        nseg += sec.nseg
    nseg = int(pc.allreduce(nseg, 1))
    if rank == 0: print "Total # compartments = ", nseg

    pc.spike_record(-1, parrun.spikevec, parrun.idvec)
    util.show_progress(200)

    odseq = [OdorStim(*od) for od in params.odor_sequence]
    model.odseq = odseq

    # record
    for rec in params.sec2rec:
        vr.record(*rec)

    if rank == 0: print 'total setup time ', h.startsw() - startsw
Esempio n. 5
0
def download_lecture(dl_link, output_name, pretty_name, sizeLocal):
    partial = bool(sizeLocal)
    req = urllib.request.Request(dl_link)
    if not partial:
        # Full download.
        mode = 'wb'
    else:
        # Resuming a partially completed download.
        req.headers['Range'] = 'bytes=%s-' % sizeLocal
        mode = 'ab'
    f = urllib.request.urlopen(req)
    # We do + sizeLocal because if we are doing a partial download, the length
    # is only for what we requested to download, not the whole thing.
    sizeWeb = int(f.headers["Content-Length"]) + sizeLocal

    if not partial:
        print(f"Downloading {pretty_name} to {output_name}.")
    else:
        print(
            f"Resuming partial download of {pretty_name} ({sizeLocal/1000:0.1f}/{sizeWeb/1000:0.1f})."
        )

    # The ab is the append write mode.
    with open(output_name, mode) as output:
        for chunk in show_progress(f, pretty_name, sizeLocal, sizeWeb):
            # Process the chunk
            output.write(chunk)
    f.close()
Esempio n. 6
0
def download_file(url: str,
                  download_dir=".cached/downloads",
                  filename="",
                  connect_timeout=10) -> str:
    download_dir = os.path.realpath(download_dir)
    filename = filename or os.path.basename(url)

    start_time = get_now()

    target_file_path = os.path.join(download_dir, filename)

    logger.info(f"开始下载 {url} 到 {target_file_path}")
    response = requests.get(url, stream=True, timeout=connect_timeout)

    if response.status_code != 200:
        raise Exception(f"下载失败,状态码 {response.status_code}")

    make_sure_dir_exists(download_dir)

    with open(target_file_path, "wb") as f:
        total_length_optional = response.headers.get("content-length")

        if total_length_optional is None:  # no content length header
            f.write(response.content)
        else:
            dl = 0
            total_length = int(total_length_optional)
            for data in response.iter_content(chunk_size=4096):
                # filter out keep-alive new lines
                if not data:
                    continue

                f.write(data)

                dl += len(data)
                show_progress(filename, total_length, dl)

    end_time = get_now()

    logger.info(color("bold_yellow") + f"下载完成,耗时 {end_time - start_time}")

    return target_file_path
Esempio n. 7
0
def load_features(label, sample_len):
    f = load_cyto_list(label)[:sample_len]

    feature_all = np.array([])

    for idx, img_path in enumerate(f):
        im16_org = open_cyto_tiff(img_path)
        feature_tmp = get_image_feature(im16_org)

        if idx is 0:
            feature_all = np.asarray(feature_tmp)
        else:
            feature_tmp = np.asarray(feature_tmp)
            feature_all = np.vstack((feature_all, feature_tmp))

        prefix = "getting label %d" % label

        show_progress(prefix, idx, sample_len)

    return feature_all
Esempio n. 8
0
def main(arguments=None):
    """ main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:]  # ignore argv[0], the script name
    (options, args) = parser.parse_args(args=arguments)

    # Adjust the logger level to WARNING, INFO or DEBUG, depending on the
    # given number of -v options (none, one or two or more, respectively)
    logging_level = logging.WARNING
    if options.verbose == 1:
        logging_level = logging.INFO
    elif options.verbose >= 2:
        logging_level = logging.DEBUG
    logging.basicConfig(format=style.LOG_FORMAT, level=logging_level)

    # Print the help and abort the execution if there are not two positional
    # arguments left after parsing the options, as the user must specify at
    # least one (only one?) input FITS file and the output directory
    if len(args) < 2:
        parser.print_help()
        return 2  # 2 is generally used for command line syntax errors
    else:
        input_paths = args[:-1]
        output_dir = args[-1]

    # No index can be within the search area if the radius is not > 0
    if options.radius <= 0:
        msg = "%sError: --radius must a positive number of degrees"
        print msg % style.prefix
        sys.exit(style.error_exit_message)

    # Make sure that the output directory exists; create it if it doesn't.
    util.determine_output_dir(output_dir)

    print "%sUsing a local build of Astrometry.net." % style.prefix
    msg = "%sDoing astrometry on the %d paths given as input."
    print msg % (style.prefix, len(input_paths))

    pool = multiprocessing.Pool(options.ncores)
    map_async_args = ((path, output_dir, options) for path in input_paths)
    result = pool.map_async(parallel_astrometry, map_async_args)

    while not result.ready():
        time.sleep(1)
        util.show_progress(queue.qsize() / len(input_paths) * 100)
        # Do not update the progress bar when debugging; instead, print it
        # on a new line each time. This prevents the next logging message,
        # if any, from being printed on the same line that the bar.
        if logging_level < logging.WARNING:
            print

    result.get()  # reraise exceptions of the remote call, if any
    util.show_progress(100)  # in case the queue was ready too soon
    print

    # Results in the process shared queue were only necessary to accurately
    # update the progress bar. They are no longer needed, so empty it now.
    queue.clear()

    print "%sYou're done ^_^" % style.prefix
    return 0
Esempio n. 9
0
def main(arguments=None):
    """ main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:]  # ignore argv[0], the script name
    (options, args) = parser.parse_args(args=arguments)

    # Print the help and abort the execution if there are fewer than three
    # positional arguments left, as the user must specify at least two FITS
    # images and the output mosaic into which they are assembled.
    if len(args) < 3:
        parser.print_help()
        return 2  # used for command line syntax errors
    else:
        assert len(args) >= 3
        input_paths = set(args[:-1])
        output_path = args[-1]

    # Refuse to overwrite the output FITS file unless explicitly instructed to
    # do so. Note that, if the --overwritten option is given, we do not need to
    # delete the existing file: it will be silently overwritten when the output
    # of montage.mosaic() is shutil.move()'d to the output path.

    if os.path.exists(output_path):
        if not options.overwrite:
            msg = "%sError. The output file '%s' already exists."
            print msg % (style.prefix, output_path)
            print style.error_exit_message
            return 1

    # Workaround for a bug in montage.mosaic() that raises an error ('mpirun
    # has exited due to process rank [...] without calling "finalize"...') if
    # mpi = True and background_match = True. Until this is fixed, we can only
    # use one core if the --background-match option is given by the user.

    if options.background_match and options.ncores > 1:
        options.ncores = 1
        for msg in (
                "{0}Warning: --background-match is incompatible with --cores > 1.",
                "{0}Setting the --cores option to a value of one.",
                "{0}This is a workaround for a known bug in montage-wrapper:",
                "{0}https://github.com/astropy/montage-wrapper/issues/18"):
            print msg.format(style.prefix)
        print

    # Map each filter to a list of FITSImage objects
    files = fitsimage.InputFITSFiles()

    msg = "%sMaking sure the %d input paths are FITS images..."
    print msg % (style.prefix, len(input_paths))

    util.show_progress(0.0)
    for index, path in enumerate(input_paths):
        # fitsimage.FITSImage.__init__() raises fitsimage.NonStandardFITS if
        # one of the paths is not a standard-conforming FITS file.
        try:
            img = fitsimage.FITSImage(path)

            # If we do not need to know the photometric filter (because the
            # --filter was not given) do not read it from the FITS header.
            # Instead, use None. This means that 'files', a dictionary, will
            # only have a key, None, mapping to all the input FITS images.

            if options.filter:
                pfilter = img.pfilter(options.filterk)
            else:
                pfilter = None

            files[pfilter].append(img)

        except fitsimage.NonStandardFITS:
            print
            msg = "'%s' is not a standard FITS file"
            raise fitsimage.NonStandardFITS(msg % path)

        percentage = (index + 1) / len(input_paths) * 100
        util.show_progress(percentage)
    print  # progress bar doesn't include newline

    # The --filter option allows the user to specify which FITS files, among
    # all those received as input, must be combined: only those images taken
    # in the options.filter photometric filter.
    if options.filter:

        msg = "%s%d different photometric filters were detected:"
        print msg % (style.prefix, len(files.keys()))

        for pfilter, images in sorted(files.iteritems()):
            msg = "%s %s: %d files (%.2f %%)"
            percentage = len(images) / len(files) * 100
            print msg % (style.prefix, pfilter, len(images), percentage)

        msg = "%sIgnoring images not taken in the '%s' photometric filter..."
        print msg % (style.prefix, options.filter),
        sys.stdout.flush()

        discarded = 0
        for pfilter, images in files.items():
            if pfilter != options.filter:
                discarded += len(images)
                del files[pfilter]

        if not files:
            print
            msg = "%sError. No image was taken in the '%s' filter."
            print msg % (style.prefix, options.filter)
            print style.error_exit_message
            return 1

        else:
            print 'done.'
            msg = "%s%d images taken in the '%s' filter, %d were discarded."
            print msg % (style.prefix, len(files), options.filter, discarded)

    # montage.mosaic() silently ignores those FITS images that have no WCS
    # information in their headers, and also raises a rather cryptic exception
    # (mMakeHdr: Invalid table file) if none of them has been astrometrically
    # solved. Instead of ignoring some images without warning or showing a
    # confusing error message that makes it almost impossible to understand
    # what may be failing, use FITSImage.center_wcs() to make sure that all the
    # images have WCS information, raising NoWCSInformationError otherwise.

    for img in files:
        # May raise NoWCSInformationError
        img.center_wcs()

    # montage.mosaic() requires as first argument the directory containing the
    # input FITS images but, in order to maintain the same syntax across all
    # LEMON commands, we receive them as command-line arguments. Thus, create a
    # temporary directory and symlink from it the input images. Hard links are
    # not an option because os.link() will raise "OSError: [Errno 18] Invalid
    # cross-device link" if the temporary directory is created in a different
    # partition.

    pid = os.getpid()
    suffix = "_LEMON_%d_mosaic" % pid
    kwargs = dict(suffix=suffix + '_input')
    input_dir = tempfile.mkdtemp(**kwargs)
    atexit.register(util.clean_tmp_files, input_dir)

    for img in files:
        path = img.path
        source = os.path.abspath(path)
        basename = os.path.basename(path)
        link_name = os.path.join(input_dir, basename)
        os.symlink(source, link_name)

    # The output of montage.mosaic() is another directory, to which several
    # files are written, so we need the path to a second temporary directory.
    # Delete it before calling mosaic(), as otherwise it will raise IOError
    # ("Output directory already exists").

    kwargs = dict(suffix=suffix + '_output')
    output_dir = tempfile.mkdtemp(**kwargs)
    atexit.register(util.clean_tmp_files, output_dir)
    os.rmdir(output_dir)

    kwargs = dict(
        background_match=options.background_match,
        combine=options.combine,
        bitpix=-64,
    )

    if options.ncores > 1:
        kwargs['mpi'] = True  # use MPI whenever possible
        kwargs['n_proc'] = options.ncores  # number of MPI processes
    montage.mosaic(input_dir, output_dir, **kwargs)

    # montage.mosaic() writes several files to the output directory, but we are
    # only interested in one of them: 'mosaic.fits', the mosaic FITS image.

    MOSAIC_OUTPUT = 'mosaic.fits'
    src = os.path.join(output_dir, MOSAIC_OUTPUT)

    if options.reproject:
        print "%sReproject mosaic to point North..." % style.prefix,
        sys.stdout.flush()
        kwargs = dict(north_aligned=True, silent_cleanup=True)
        montage.reproject(src, output_path, **kwargs)
        print 'done.'
    else:
        # No reprojection, move mosaic to the output path
        shutil.move(src, output_path)

    print "%sYou're done ^_^" % style.prefix
    return 0
Esempio n. 10
0
def main(arguments=None):
    """main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:]  # ignore argv[0], the script name
    (options, args) = parser.parse_args(args=arguments)

    # Adjust the logger level to WARNING, INFO or DEBUG, depending on the
    # given number of -v options (none, one or two or more, respectively)
    logging_level = logging.WARNING
    if options.verbose == 1:
        logging_level = logging.INFO
    elif options.verbose >= 2:
        logging_level = logging.DEBUG
    logging.basicConfig(format=style.LOG_FORMAT, level=logging_level)

    # Print the help and abort the execution if there are not two positional
    # arguments left after parsing the options, as the user must specify at
    # least one (only one?) input FITS file and the output JSON file.
    if len(args) < 2:
        parser.print_help()
        return 2  # 2 is generally used for command line syntax errors
    else:
        sources_img_path = args[0]
        input_paths = list(set(args[1:-1]))
        output_json_path = args[-1]

    # The execution of this module, especially when doing long-term monitoring
    # of reasonably crowded fields, may easily take several *days*. The least
    # we can do, in order to spare the end-user from insufferable grief because
    # of the waste of billions of valuable CPU cycles, is to avoid to have the
    # output file accidentally overwritten.

    if os.path.exists(output_json_path):
        if not options.overwrite:
            msg = "%sError. The output file '%s' already exists."
            print msg % (style.prefix, output_json_path)
            print style.error_exit_message
            return 1

    msg = "%sExamining the headers of the %s FITS files given as input..."
    print msg % (style.prefix, len(input_paths))

    files = fitsimage.InputFITSFiles()
    for index, img_path in enumerate(input_paths):
        img = fitsimage.FITSImage(img_path)
        pfilter = img.pfilter(options.filterk)
        files[pfilter].append(img)

        percentage = (index + 1) / len(input_paths) * 100
        util.show_progress(percentage)

    print  # progress bar doesn't include newline
    print style.prefix

    # To begin with, we need to identify the most constant stars, something for
    # which we have to do photometry on all the stars and for all the images of
    # the campaign. But fret not, as this has to be done only this time: once
    # we get the light curves of all the stars and for all the images, we will
    # be able to determine which are the most constant among them and work
    # always with this subset in order to determine which aperture and sky
    # annulus are the optimal.

    msg = "%sDoing initial photometry with FWHM-derived apertures..."
    print msg % style.prefix
    print style.prefix

    # mkstemp() returns a tuple containing an OS-level handle to an open file
    # and its absolute pathname. Thus, we need to close the file right after
    # creating it, and tell the photometry module to overwrite (-w) it.

    kwargs = dict(prefix="photometry_", suffix=".LEMONdB")
    phot_db_handle, phot_db_path = tempfile.mkstemp(**kwargs)
    atexit.register(util.clean_tmp_files, phot_db_path)
    os.close(phot_db_handle)

    basic_args = [sources_img_path] + input_paths + [phot_db_path, "--overwrite"]

    phot_args = [
        "--maximum",
        options.maximum,
        "--margin",
        options.margin,
        "--cores",
        options.ncores,
        "--min-sky",
        options.min,
        "--objectk",
        options.objectk,
        "--filterk",
        options.filterk,
        "--datek",
        options.datek,
        "--timek",
        options.timek,
        "--expk",
        options.exptimek,
        "--coaddk",
        options.coaddk,
        "--gaink",
        options.gaink,
        "--fwhmk",
        options.fwhmk,
        "--airmk",
        options.airmassk,
    ]

    # The --gain and --uik options default to None, so add them to the list of
    # arguments only if they were given. Otherwise, (a) --gaink would be given
    # a value of 'None', a string, that would result in an error when optparse
    # attempted to convert it to float, and (b) --uik would understood 'None'
    # as the name of the keyword storing the path to the uncalibrated image.

    if options.gain:
        phot_args += ["--gain", options.gain]

    if options.uncimgk:
        phot_args += ["--uncimgk", options.uncimgk]

    # Pass as many '-v' options as we have received here
    [phot_args.append("-v") for x in xrange(options.verbose)]

    extra_args = [
        "--aperture",
        options.aperture,
        "--annulus",
        options.annulus,
        "--dannulus",
        options.dannulus,
    ]

    # Non-zero return codes raise subprocess.CalledProcessError
    args = basic_args + phot_args + extra_args
    check_run(photometry.main, [str(a) for a in args])

    # Now we need to compute the light curves and find those that are most
    # constant. This, of course, has to be done for each filter, as a star
    # identified as constant in Johnson I may be too faint in Johnson B, for
    # example. In other words: we need to calculate the light curve of each
    # star and for each filter, and then determine which are the
    # options.nconstant stars with the lowest standard deviation.

    print style.prefix
    msg = "%sGenerating light curves for initial photometry."
    print msg % style.prefix
    print style.prefix

    kwargs = dict(prefix="diffphot_", suffix=".LEMONdB")
    diffphot_db_handle, diffphot_db_path = tempfile.mkstemp(**kwargs)
    atexit.register(util.clean_tmp_files, diffphot_db_path)
    os.close(diffphot_db_handle)

    diff_args = [
        phot_db_path,
        "--output",
        diffphot_db_path,
        "--overwrite",
        "--cores",
        options.ncores,
        "--minimum-images",
        options.min_images,
        "--stars",
        options.nconstant,
        "--minimum-stars",
        options.min_cstars,
        "--pct",
        options.pct,
        "--weights-threshold",
        options.wminimum,
        "--max-iters",
        options.max_iters,
        "--worst-fraction",
        options.worst_fraction,
    ]

    [diff_args.append("-v") for x in xrange(options.verbose)]

    check_run(diffphot.main, [str(a) for a in diff_args])
    print style.prefix

    # Map each photometric filter to the path of the temporary file where the
    # right ascension and declination of each constant star, one per line, will
    # be saved. This file is from now on passed, along with the --coordinates
    # option, to photometry.main(), so that photometry is not done on all the
    # astronomical objects, but instead exclusively on these ones.

    coordinates_files = {}

    miner = mining.LEMONdBMiner(diffphot_db_path)
    for pfilter in miner.pfilters:

        # LEMONdBMiner.sort_by_curve() returns a list of two-element tuples,
        # mapping the ID of each star to the standard deviation of its light
        # curve in this photometric filter. The list is sorted in increasing
        # order by the standard deviation. We are only interested in the first
        # 'options.nconstant', needing at least 'options.pminimum'.

        msg = "%sIdentifying the %d most constant stars for the %s filter..."
        args = style.prefix, options.nconstant, pfilter
        print msg % args,
        sys.stdout.flush()

        kwargs = dict(minimum=options.min_images)
        stars_stdevs = miner.sort_by_curve_stdev(pfilter, **kwargs)
        cstars = stars_stdevs[: options.nconstant]

        if len(cstars) < options.pminimum:
            msg = (
                "fewer than %d stars identified as constant in the "
                "initial photometry for the %s filter"
            )
            args = options.pminimum, pfilter
            raise NotEnoughConstantStars(msg % args)
        else:
            print "done."

        if len(cstars) < options.nconstant:
            msg = "%sBut only %d stars were available. Using them all, anyway."
            print msg % (style.prefix, len(cstars))

        # Replacing whitespaces with underscores is easier than having to quote
        # the path to the --coordinates file if the name of the filter contains
        # them (otherwise, optparse would only see up to the first whitespace).
        prefix = "%s_" % str(pfilter).replace(" ", "_")
        kwargs = dict(prefix=prefix, suffix=".coordinates")
        coords_fd, coordinates_files[pfilter] = tempfile.mkstemp(**kwargs)
        atexit.register(util.clean_tmp_files, coordinates_files[pfilter])

        # LEMONdBMiner.get_star() returns a five-element tuple with the x and y
        # coordinates, right ascension, declination and instrumental magnitude
        # of the astronomical object in the sources image.
        for star_id, _ in cstars:
            ra, dec = miner.get_star(star_id)[2:4]
            os.write(coords_fd, "%.10f\t%.10f\n" % (ra, dec))
        os.close(coords_fd)

        msg = "%sStar coordinates for %s temporarily saved to %s"
        print msg % (style.prefix, pfilter, coordinates_files[pfilter])

    # The constant astronomical objects, the only ones to which we will pay
    # attention from now on, have been identified. So far, so good. Now we
    # generate the light curves of these objects for each candidate set of
    # photometric parameters. We store the evaluated values in a dictionary in
    # which each filter maps to a list of json_parse.CandidateAnnuli objects.

    evaluated_annuli = collections.defaultdict(list)

    for pfilter, coords_path in coordinates_files.iteritems():

        print style.prefix
        msg = "%sFinding the optimal photometric parameters for the %s filter."
        print msg % (style.prefix, pfilter)

        if len(files[pfilter]) < options.min_images:
            msg = "fewer than %d images (--minimum-images option) for %s"
            args = options.min_images, pfilter
            raise NotEnoughConstantStars(msg % args)

        # The median FWHM of the images is needed in order to calculate the
        # range of apertures that we need to evaluate for this filter.

        msg = "%sCalculating the median FWHM for this filter..."
        print msg % style.prefix,

        pfilter_fwhms = []
        for img in files[pfilter]:
            img_fwhm = photometry.get_fwhm(img, options)
            logging.debug("%s: FWHM = %.3f" % (img.path, img_fwhm))
            pfilter_fwhms.append(img_fwhm)

        fwhm = numpy.median(pfilter_fwhms)
        print " done."

        # FWHM to range of pixels conversion
        min_aperture = fwhm * options.lower
        max_aperture = fwhm * options.upper
        annulus = fwhm * options.sky
        dannulus = fwhm * options.width

        # The dimensions of the sky annulus remain fixed, while the
        # aperture is in the range [lower * FWHM, upper FWHM], with
        # increments of options.step pixels.
        filter_apertures = numpy.arange(min_aperture, max_aperture, options.step)
        assert filter_apertures[0] == min_aperture

        msg = "%sFWHM (%s passband) = %.3f pixels, therefore:"
        print msg % (style.prefix, pfilter, fwhm)
        msg = "%sAperture radius, minimum = %.3f x %.2f = %.3f pixels "
        print msg % (style.prefix, fwhm, options.lower, min_aperture)
        msg = "%sAperture radius, maximum = %.3f x %.2f = %.3f pixels "
        print msg % (style.prefix, fwhm, options.upper, max_aperture)
        msg = "%sAperture radius, step = %.2f pixels, which means that:"
        print msg % (style.prefix, options.step)

        msg = "%sAperture radius, actual maximum = %.3f + %d x %.2f = %.3f pixels"
        args = (
            style.prefix,
            min_aperture,
            len(filter_apertures),
            options.step,
            max(filter_apertures),
        )
        print msg % args

        msg = "%sSky annulus, inner radius = %.3f x %.2f = %.3f pixels"
        print msg % (style.prefix, fwhm, options.sky, annulus)
        msg = "%sSky annulus, width = %.3f x %.2f = %.3f pixels"
        print msg % (style.prefix, fwhm, options.width, dannulus)

        msg = "%s%d different apertures in the range [%.2f, %.2f] to be evaluated:"
        args = (
            style.prefix,
            len(filter_apertures),
            filter_apertures[0],
            filter_apertures[-1],
        )
        print msg % args

        # For each candidate aperture, and only with the images taken in
        # this filter, do photometry on the constant stars and compute the
        # median of the standard deviation of their light curves as a means
        # of evaluating the suitability of this combination of parameters.
        for index, aperture in enumerate(filter_apertures):

            print style.prefix

            kwargs = dict(prefix="photometry_", suffix=".LEMONdB")
            fd, aper_phot_db_path = tempfile.mkstemp(**kwargs)
            atexit.register(util.clean_tmp_files, aper_phot_db_path)
            os.close(fd)

            paths = [img.path for img in files[pfilter]]
            basic_args = [sources_img_path] + paths + [aper_phot_db_path, "--overwrite"]

            extra_args = [
                "--filter",
                str(pfilter),
                "--coordinates",
                coords_path,
                "--aperture-pix",
                aperture,
                "--annulus-pix",
                annulus,
                "--dannulus-pix",
                dannulus,
            ]

            args = basic_args + phot_args + extra_args
            check_run(photometry.main, [str(a) for a in args])

            kwargs = dict(prefix="diffphot_", suffix=".LEMONdB")
            fd, aper_diff_db_path = tempfile.mkstemp(**kwargs)
            atexit.register(util.clean_tmp_files, aper_diff_db_path)
            os.close(fd)

            # Reuse the arguments used earlier for diffphot.main(). We only
            # need to change the first argument (path to the input LEMONdB)
            # and the third one (path to the output LEMONdB)
            diff_args[0] = aper_phot_db_path
            diff_args[2] = aper_diff_db_path
            check_run(diffphot.main, [str(a) for a in diff_args])

            miner = mining.LEMONdBMiner(aper_diff_db_path)

            try:
                kwargs = dict(minimum=options.min_images)
                cstars = miner.sort_by_curve_stdev(pfilter, **kwargs)
            except mining.NoStarsSelectedError:
                # There are no light curves with at least options.min_images points.
                # Therefore, much to our sorrow, we cannot evaluate this aperture.
                msg = "%sNo constant stars for this aperture. Ignoring it..."
                print msg % style.prefix
                continue

            # There must be at most 'nconstant' stars, but there may be fewer
            # if this aperture causes one or more of the constant stars to be
            # too faint (INDEF) in so many images as to prevent their lights
            # curve from being computed.
            assert len(cstars) <= options.nconstant

            if len(cstars) < options.pminimum:
                msg = (
                    "%sJust %d constant stars, fewer than the allowed "
                    "minimum of %d, had their light curves calculated "
                    "for this aperture. Ignoring it..."
                )
                args = style.prefix, len(cstars), options.pminimum
                print style.prefix
                continue

            # 'cstars' contains two-element tuples: (ID, stdev)
            stdevs_median = numpy.median([x[1] for x in cstars])
            params = (aperture, annulus, dannulus, stdevs_median)
            # NumPy floating-point data types are not JSON serializable
            args = (float(x) for x in params)
            candidate = json_parse.CandidateAnnuli(*args)
            evaluated_annuli[pfilter].append(candidate)

            msg = "%sAperture = %.3f, median stdev (%d stars) = %.4f"
            args = style.prefix, aperture, len(cstars), stdevs_median
            print msg % args

            percentage = (index + 1) / len(filter_apertures) * 100
            msg = "%s%s progress: %.2f %%"
            args = style.prefix, pfilter, percentage
            print msg % args

        # Let the user know of the best 'annuli', that is, the one for
        # which the standard deviation of the constant stars is minimal
        kwargs = dict(key=operator.attrgetter("stdev"))
        best_candidate = min(evaluated_annuli[pfilter], **kwargs)

        msg = "%sBest aperture found at %.3f pixels with stdev = %.4f"
        args = style.prefix, best_candidate.aperture, best_candidate.stdev
        print msg % args

    print style.prefix
    msg = "%sSaving the evaluated apertures to the '%s' JSON file ..."
    print msg % (style.prefix, output_json_path),
    json_parse.CandidateAnnuli.dump(evaluated_annuli, output_json_path)
    print " done."

    print "%sYou're done ^_^" % style.prefix
    return 0
Esempio n. 11
0
def main(arguments=None):
    """ main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:]  # ignore argv[0], the script name
    (options, args) = parser.parse_args(args=arguments)

    # Print the help message and abort the execution if there are not two
    # positional arguments left after parsing the options, as the user must
    # specify the path to both the input and output directories.

    if len(args) < 2:
        parser.print_help()
        return 2  # 2 is generally used for command line syntax errors
    else:
        input_dirs = args[:-1]
        output_dir = args[-1]

    # Make sure that all the input directories exist, abort otherwise.
    for path in input_dirs:
        if not os.path.exists(path):
            print "%sThe input directory, '%s', does not exist. Exiting." % \
                  (style.prefix, path)
            return 1

    # The input and output directories must be different, as otherwise some
    # files (especially if the filename of the output files is automatically
    # detected) could be overwritten.
    for path in input_dirs:
        if os.path.abspath(path) == os.path.abspath(output_dir):
            print "%s[INPUT_DIRS] and OUTPUT_DIR must be different. " \
                  "Exiting." % style.prefix
            return 1

    # Make sure that the output directory exists, create it otherwise
    util.determine_output_dir(output_dir)

    # Recursively walk down the input directories, obtaining a list of all the
    # regular files. Then, and while a progress bar is shown to let the user
    # estimate how much longer it is, detect which among them are FITS files.

    print "%sIndexing regular files within directory trees starting at " \
          "INPUT_DIRS..." % style.prefix ,
    files_paths = fitsimage.find_files(input_dirs,
                                       followlinks=options.followlinks,
                                       pattern=options.pattern)
    print 'done.'

    print "%sDetecting FITS images among the %d indexed regular files..." % \
          (style.prefix, len(files_paths))

    images_set = set()
    util.show_progress(0.0)
    for path_index, path in enumerate(files_paths):
        try:
            images_set.add(fitsimage.FITSImage(path))
            fraction = (path_index + 1) / len(files_paths) * 100
            util.show_progress(fraction)
        except fitsimage.NonStandardFITS:
            pass
    else:
        util.show_progress(100)
        print

    if not len(images_set):
        print "%sNo FITS files were found. Exiting." % style.prefix
        return 1
    else:
        print "%s%d FITS files detected." % (style.prefix, len(images_set))

    # All the images must have the same size; otherwise, only those with the
    # most common dimensions will be imported, while the rest will be ignored
    print style.prefix
    print "%sChecking the sizes of the detected images..." % style.prefix,
    img_sizes = collections.defaultdict(int)  # dimensions counter
    for img in images_set:
        img_sizes[img.size] += 1
    print 'done.'

    # The most common size is the only one element in case len(img_sizes) == 1
    x_size, y_size = max(img_sizes.iterkeys(), key=img_sizes.get)[:2]

    if len(img_sizes) == 1:
        print "%sAll the FITS images have the same size: %d x %d pixels" % \
              (style.prefix, x_size, y_size)
    else:

        print "%sMultiple sizes were detected among the FITS images." % style.prefix
        print "%sDiscarding images with a size other than %d x %d pixels, " \
              "the most common..." % (style.prefix, x_size, y_size) ,
        old_size = len(images_set)
        images_set = set(img for img in images_set
                         if img.size == (x_size, y_size))
        print 'done.'

        if not images_set:
            print "%sThere are no FITS files left. Exiting." % style.prefix
            return 1
        else:
            print "%s%d FITS files were discarded because of their size, " \
                  "%s remain." % (style.prefix, old_size - len(images_set),
                                  len(images_set))

    # Those FITS images whose object names do not match any of the given
    # patterns, or which do not even have the keyword which contains the
    # name for the object observed, are discarded.
    print style.prefix
    print "%sImporting only those FITS files whose %s keyword can be found " \
          "and matches" % (style.prefix, options.objectk)
    print "%sone of the following Unix patterns: %s ..." % \
          (style.prefix, options.objectn)

    # We first test that the keyword exists (hence the pass for the KeyError
    # exception, which means that the image is filtered out) and, after that,
    # check whether its value matches one of the regular expressions which
    # define the object names to be imported.
    object_set = set()

    # Keep the track of how many images are ignored for each reason
    saturated_excluded = 0
    non_match_excluded = 0

    for img in images_set:

        try:
            object_name = img.read_keyword(options.objectk)
            for pattern in options.objectn:
                regexp = re.compile(fnmatch.translate(pattern), re.IGNORECASE)
                if regexp.match(object_name):
                    # Even if the object name matchs, the median number of
                    # counts must still be below the threshold, if any. If the
                    # number of ADUs is irrelevant we can avoid having to
                    # unnecessarily compute it.
                    if options.max_counts:
                        with pyfits.open(img.path, readonly=True) as hdu:
                            median_counts = numpy.median(hdu[0].data)
                        if median_counts > options.max_counts:
                            print "%s%s excluded (matched, but saturated " \
                                  "with %d ADUs)" % (style.prefix, img.path,
                                                     median_counts)
                            saturated_excluded += 1
                            break

                    # This point reached if median number of ADUs of image is
                    # above the threshold or irrelevant, so it can be imported.
                    print "%s%s imported (%s matches '%s')" % (
                        style.prefix, img.path, object_name, pattern)

                    object_set.add(img)
                    break

            else:  # only executed if for loop exited cleanly
                print "%s%s excluded (%s does not match anything)" % \
                      (style.prefix, img.path, object_name)
                non_match_excluded += 1
        except KeyError:
            pass

    if not saturated_excluded and not non_match_excluded:
        print "%sNo images were filtered out. Hooray!" % style.prefix
    if saturated_excluded:
        print "%s%d files were discarded because they were saturated " \
              "(> %d ADUs)." % (style.prefix, saturated_excluded,
                                options.max_counts)
    if non_match_excluded:
        print "%s%d files were discarded because of their non-matching " \
              "object names." % (style.prefix, non_match_excluded)

    # Abort the execution if all the FITS files were filtered out
    if not object_set:
        print "%sThere are no FITS files left. Exiting." % style.prefix
        return 1

    # Sort the FITS files by their date of observation, according to the header
    print style.prefix
    print "%sSorting the FITS files by their date of observation " \
          "[keyword: %s]..." % (style.prefix, options.datek) ,

    kwargs = dict(date_keyword=options.datek,
                  time_keyword=options.timek,
                  exp_keyword=options.exptimek)
    get_date = operator.methodcaller('date', **kwargs)
    sorted_imgs = sorted(object_set, key=get_date)

    # Let the user know if one or more images could not be sorted (because of
    # problems when parsing the FITS keywords from which the observation date
    # is derived) and thus discarded.
    difference = len(object_set) - len(sorted_imgs)
    assert difference >= 0
    if difference:
        print
        print "%s%d files were discarded as the observation date keyword " \
              "was not found or the " % (style.prefix, difference)
        print "%sdate in it represented did not conform to the FITS " \
              "standard." % style.prefix

        # Execution is aborted if all the FITS files were filtered out
        if not sorted_imgs:
            print "%sThere are no FITS files left. Exiting." % style.prefix
            return 1
    else:
        print 'done.'

    # If no filename for the output images was specified, attempt to
    # automatically detect the most common basename among the FITS files.
    # This is doing by extracting the leftmost non-numeric substring of
    # all the filenames and taking that which repeats the most.

    if not options.filename:
        print style.prefix
        print "%sDetecting the most common name among input files..." % \
              style.prefix ,
        sys.stdout.flush()

        # Use a dictionary in order to keep the track of how many times we
        # have come across each prefix (leftmost non-numeric substring in
        # the filename) and select that with most occurrences.

        prefixes = collections.defaultdict(int)
        for prefix in (img.prefix for img in sorted_imgs):
            prefixes[prefix] += 1

        # Select the prefix (key) that is repeated the most
        options.filename = max(prefixes, key=prefixes.get)
        print 'done.'

    print "%sImported FITS filenames will start with the string: '%s'" % \
          (style.prefix, options.filename)

    # Now we have to copy the FITS files. The basename of each imported file
    # will be options.filename + its sequence number. Filling zeros will be
    # affixed to each number so that the lenth of all the basenames is the
    # same. Following Dijkstra's teachings, we start numbering at zero.

    assert len(sorted_imgs)
    ndigits = len(str(len(sorted_imgs) - 1))
    print "%s%d digits are needed in order to enumerate %d files." % \
          (style.prefix, ndigits, len(sorted_imgs))

    print style.prefix
    print "%sCopying the FITS files to '%s'..." % \
          (style.prefix, output_dir)

    for index, fits_file in enumerate(sorted_imgs):

        # i.e., 'ferM_' + '0000' + '.fits' = 'ferM_0000.fits'
        dest_name = '%s%0*d.fits' % (options.filename, ndigits, index)
        dest_path = os.path.join(output_dir, dest_name)

        shutil.copy2(fits_file.path, dest_path)

        # The permission bits have been copied, but we need to make sure
        # that the copy of the FITS file is always writable, no matter what
        # the original permissions were. This is equivalent to `chmod u+w`
        util.owner_writable(dest_path, True)

        dest_img = fitsimage.FITSImage(dest_path)

        # Add some information to the FITS header...
        if not options.exact:

            msg1 = "File imported by LEMON on %s" % util.utctime()
            dest_img.add_history(msg1)

            # If the --uik option is given, store in this keyword the absolute
            # path to the image of which we made a copy. This allows other
            # LEMON commands, if necessary, to access the original FITS files
            # in case the imported images are modified (e.g., bias subtraction
            # or flat-fielding) before these other commands are executed.

            if options.uncimgk:

                comment = "before any calibration task"
                dest_img.update_keyword(options.uncimgk,
                                        os.path.abspath(dest_img.path),
                                        comment=comment)

                msg2 = "[Import] Original image: %s"
                dest_img.add_history(msg2 % os.path.abspath(fits_file.path))

        # ... unless we want an exact copy of the images. If that is the case,
        # verify that the SHA-1 checksum of the original and the copy matches
        elif fits_file.sha1sum != dest_img.sha1sum:
            msg = "copy of %s not identical (SHA-1 differs)" % fits_file.path
            raise IOError(msg)

        # Show which file has been copied, using the format of the
        # 'cp -v' command: `./ultra2/ferM_11.fits' -> `imported/img_01.fits'
        print "%s`%s' -> `%s'" % (style.prefix, fits_file.path, dest_path)

    # Finally, let the user know how many FITS images, and the fraction of
    # the total, that were imported, as well as their size in megabytes.
    print style.prefix
    ifraction = len(sorted_imgs) / len(images_set) * 100
    print "%sFITS files detected: %d" % (style.prefix, len(images_set))
    print "%sFITS files successfully imported: %d (%.2f%%)" % \
          (style.prefix, len(sorted_imgs), ifraction)

    total_size = 0.0
    for fits_file in sorted_imgs:
        total_size += os.path.getsize(fits_file.path)  # in bytes

    print "%sTotal size of imported files: %.2f MB" % \
          (style.prefix, total_size / (1024.0 ** 2))
    print "%sYou're done ^_^" % style.prefix
    return 0
Esempio n. 12
0
def download_lectures_for_subject(driver, subject, downloaded, skipped,
                                  current_year, week_day, dates_list,
                                  download_mode, video_folder):

    subjCode, name, link, sub_num = subject
    print(f"\nNow working on {subjCode}: {name}")

    # Go to subject page and find Lecture Recordings page.
    driver.get(link)

    main_window = driver.current_window_handle
    link_num = 0
    while True:
        try:
            # Used where code under IndexError is also used (new tabs
            # accidentally opened)
            # driver.switch_to_window(main_window)
            recs_page = search_link_text(driver, LECTURE_TAB_STRINGS, link_num)
            if recs_page is None:
                print("No recordings page found, skipping to next subject")
                return
            recs_page.click()
            time.sleep(.1)

            iframe = driver.find_elements_by_tag_name('iframe')[1]
            driver.switch_to_frame(iframe)
            iframe2 = driver.find_elements_by_tag_name('iframe')[0]
            driver.switch_to_frame(iframe2)
            iframe3 = driver.find_elements_by_tag_name('iframe')[0]
            driver.switch_to_frame(iframe3)
            break

        # Incorrect page opened due to multiple 'Lecture' links. Try next link
        # Untested, because the problem causing this bug mysteriously stopped
        # Could cause main tab to close. This also handles intermediate page
        # errors (because they're viewed as IndexErrors).
        except IndexError:
            # BELOW COMMENTED CODE LEFT IN CASE ERROR ARISES AGAIN
            # Switch tab to the new tab, which we will assume is the next one
            # on the right
            # driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + Keys.TAB)
            # Close this new tab
            # driver.find_element_by_tag_name('body').send_keys(Keys.CONTROL + 'w')
            link_num += 1

    # find ul element, list of recordings
    while True:
        try:
            recs_ul = driver.find_element_by_css_selector("ul#echoes-list")
            recs_list = recs_ul.find_elements_by_css_selector("li.li-echoes")
            break
        except NoSuchElementException:
            print("Slow connection, waiting for echocenter to load...")
            time.sleep(0.5)

    # setup for recordings
    multiple_lectures = False
    lectures_list = []
    to_download = []

    # print status
    print("Building list of lectures...")
    # scroll_wrapper = driver.find_elements
    # driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    # for each li element, build up filename info and add to download list
    for rec_num, recording in enumerate(recs_list):
        # click on each recording to get different download links
        date_div = recording.find_element_by_css_selector("div.echo-date")
        #date_div = recording

        # Deals with error where the next element can't be selected if it isn't
        # literally visible. Weird behaviour, but the solution is to catch the
        # error and tab downwards.
        scroll_point = 10
        while True:
            scroll_point += 5
            try:
                recording.click()
                break
            except ElementNotVisibleException:
                actions = webdriver.ActionChains(driver)
                actions.move_to_element(recording)
                actions.click()
                actions.send_keys(Keys.SPACE)
                actions.perform()
            except:
                driver.execute_script(f"arguments[0].focus();", recs_ul)
                driver.execute_script(f"window.scrollTo(0, {scroll_point});")

        # convert string into datetime.datetime object
        # date is formatted like "August 02 3:20 PM" but I want "August 02 2016"
        # so I need to get rid of time and add year
        date_string = " ".join(
            date_div.text.split(" ")[:-2]) + f" {current_year}"
        try:
            date = datetime.datetime.strptime(date_string, "%d %B %Y")
        except ValueError:
            # Sometimes the date is presented in different format.
            date = datetime.datetime.strptime(date_string, "%B %d %Y")

        # Checking if we can terminate early.
        # DATE ERROR
        # print(dates_list[0])
        if date < dates_list[0]:
            print(
                "The lectures further down are outside the date range, no need to check them."
            )
            break

        # lookup week number and set default lecture number
        week_num = week_day[date]
        lec_num = 1

        # get link to initial download page for either audio or video
        while True:
            try:
                if download_mode == "audio":
                    first_link = driver.find_element_by_partial_link_text(
                        "Audio File").get_attribute("href")
                else:
                    first_link = driver.find_element_by_partial_link_text(
                        "Video File").get_attribute("href")
                break
            except NoSuchElementException:
                time.sleep(0.5)

        # check if week_num is already in to_download
        for lecture in lectures_list:
            if lecture.week == week_num:
                # set multiple_lectures to true so filenames include lec nums
                multiple_lectures = True
                # add 1 to lec_num of earlier video
                lecture.lecOfWeek += 1

        # Create Lecture
        lectures_list.append(
            Lecture(first_link, subjCode, week_num, lec_num, date, name,
                    rec_num))

    # TODO: Get the length of the <ul>...</ul>, use it when creating the
    #       lectures instead
    # Fixing Lecture Nums (lecs are downloaded & created in reverse order)
    num_lectures = len(lectures_list)
    for lec in lectures_list:
        lec.recNum = num_lectures - lec.recNum

    # # Getting the subject folder in which to put the lecture.
    # preset_subject_folders = settings['subject_folders']
    # if preset_subject_folders != '':
    #     subjectFolder =
    subjectFolder = getSubjectFolder(lec.subjCode, video_folder)

    # assign filenames
    # made it a separate loop because in the loop above it's constantly
    # updating earlier values etc
    for lec in lectures_list:

        # DAVE_PERSONAL_PREFERENCE
        filename = f"{lec.subjName} - L{lec.recNum:02}"
        # filename = lec.subjCode + " Week " + str(weekNum).zfill(2) + " Lecture"

        # if multiple_lectures == True: Don't worry about this, wasn't implemented properly in the first place.
        # This line would determine whether to append the lecture number to the file name.
        # filename = filename + " " + str(lecNum)

        if download_mode == "audio":
            filename_with_ext = filename + ".mp3"
            folder = audio_folder
        else:
            filename_with_ext = filename + ".m4v"
            folder = video_folder

        file_path = os.path.join(folder, subjectFolder, LECTURE_FOLD_NAME,
                                 filename_with_ext)

        if not os.path.isdir(
                os.path.join(folder, subjectFolder, LECTURE_FOLD_NAME)):
            print("Making {} folder for {}".format(LECTURE_FOLD_NAME, folder))
            os.makedirs(os.path.join(folder, subjectFolder, LECTURE_FOLD_NAME))

        lec.fName = filename
        lec.fPath = file_path

    # TODO - This is going into each link even if we don't need the lecture.
    #        This slows the program down massively.
    #        Perhaps filter out those with invalid dates & non-existent files?
    #        A part of this is caused by having to check file sizes every
    #        single time. Get the file sizes once, save into a document, so we
    #        don't have to do this every time.
    # only add lectures to be downloaded if they are inside date range. else,
    # skip them
    for lec in lectures_list:

        # Append to download list if the file in date range and doesn't exist yet.
        if lec.date in dates_list and not os.path.isfile(lec.fPath):
            print(f"Will download {lec.fName}")
            to_download.append(
                (lec, False))  # False means not downloaded at all.

        # If the file is in the range but does exist, check that the file is completely
        # downloaded. If not, we will add it to the download list and overwrite the
        # local incomplete version.
        elif lec.date in dates_list and os.path.isfile(lec.fPath):
            while True:
                try:
                    driver.get(lec.link)
                    dl_link = driver.find_element_by_partial_link_text(
                        "Download media file.").get_attribute("href")
                    # send javascript to stop download redirect
                    driver.execute_script('stopCounting=true')
                    break
                except:
                    time.sleep(0.5)
            # Check size of file on server. If the server version is larger than the local version,
            # we notify the user of an incomplete file (perhaps the connection dropped or the user
            # cancelled the download). We tell them we're going to download it again.
            # Using wget we could resume the download, but python urllib doesn't have such functionality.
            try:
                f = urllib.request.urlopen(dl_link)
                # This is the size of the file on the server in bytes.
                sizeWeb = int(f.headers["Content-Length"])
            except:
                # Catching the situation where the server doesn't advertise the file length.
                sizeWeb = 0

            # Get size of file on disk.
            statinfo = stat(lec.fPath)
            sizeLocal = statinfo.st_size

            # Add to download list with note that it was incomplete.
            # TODO Unify the two bits of code to do with downloading / progress.
            if sizeWeb > sizeLocal:
                lec.dl_status = "Incomplete file (%0.1f/%0.1f MiB)." % (
                    sizeLocal / 1024 / 1024,
                    sizeWeb / 1024 / 1024,
                )
                # Include (sizeLocal, sizeWeb) if partially downloaded.
                to_download.append((lec, (sizeLocal, sizeWeb)))
                print("Resuming " + lec.fName + ": " + lec.dl_status)
            # Otherwise the file must be fully downloaded.
            else:
                lec.dl_status = "File already exists on disk (fully downloaded)."
                skipped.append(lec)
                print("Skipping " + lec.fName + ": " + lec.dl_status)

        # Dealing with other cases.
        else:
            # if both outside date range and already exists
            if not lec.date in dates_list and os.path.isfile(lec.fPath):
                lec.dl_status = "Outside date range and file already exists"
            # if just outside date range
            elif not lec.date in dates_list:
                lec.dl_status = "Outside date range"
            # If file already exists and is fully completed.
            # Shouldn't really get to this case (caught above).
            elif os.path.isfile(lec.fPath):
                lec.dl_status = "File already exists"
            skipped.append(lec)
            print(f"Skipping {lec.fName}: {lec.dl_status}")

    # print list of lectures to be downloaded
    if len(to_download) > 0:
        print("Lectures to be downloaded:")
        for lec, partial in to_download:
            # Print with additional note if it's there.
            # DAVE_HERE
            if lec.dl_status is not None:
                print(lec.fName, "-", lec.dl_status)
            else:
                print(lec.fName)
    else:
        print("No lectures to be downloaded.")

    # for each lecture, set filename and download
    for lec, partial in to_download:

        # build up filename
        print("Now working on", lec.fName)
        # go to initial download page and find actual download link
        while True:
            try:
                driver.get(lec.link)
                dl_link = driver.find_element_by_partial_link_text(
                    "Download media file.").get_attribute("href")
                # send javascript to stop download redirect
                driver.execute_script('stopCounting=true')
                break
            except:
                time.sleep(0.5)

        # Easy to deal with full download, just use urlretrieve. reporthook gives a progress bar.
        if partial == False:
            print("Downloading to", lec.fPath)
            urllib.request.urlretrieve(dl_link, lec.fPath, reporthook)
        # This handles a partially downloaded file.
        else:
            sizeLocal, sizeWeb = partial
            print("Resuming partial download of %s (%0.1f/%0.1f)." %
                  (lec.fName, sizeLocal / 1000, sizeWeb / 1000))

            req = urllib.request.Request(dl_link)
            req.headers['Range'] = 'bytes=%s-' % sizeLocal
            f = urllib.request.urlopen(req)
            # The ab is the append write mode.
            with open(lec.fPath, 'ab') as output:
                for chunk in show_progress(f, sizeLocal, sizeWeb):
                    # Process the chunk
                    output.write(chunk)

        print("Completed! Going to next file!")
        downloaded.append(lec)

    # when finished with subject
    print("Finished downloading files for", lec.subjCode)
    return downloaded, skipped
Esempio n. 13
0
def main(arguments=None):
    """main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:]  # ignore argv[0], the script name
    (options, args) = parser.parse_args(args=arguments)

    # Adjust the logger level to WARNING, INFO or DEBUG, depending on the
    # given number of -v options (none, one or two or more, respectively)
    logging_level = logging.WARNING
    if options.verbose == 1:
        logging_level = logging.INFO
    elif options.verbose >= 2:
        logging_level = logging.DEBUG
    logging.basicConfig(format=style.LOG_FORMAT, level=logging_level)

    if len(args) != 2:
        parser.print_help()
        return 2  # used for command line syntax errors
    else:
        assert len(args) == 2
        input_db_path = args[0]
        output_db_path = args[1]

    if options.min_cstars > options.ncstars:
        print "%sError. The value of --minimum-stars must be <= --stars." % style.prefix
        print style.error_exit_message
        return 1

    if not os.path.exists(input_db_path):
        print "%sError. Database '%s' does not exist." % (style.prefix,
                                                          input_db_path)
        print style.error_exit_message
        return 1

    if os.path.exists(output_db_path):
        if not options.overwrite:
            print "%sError. The output database '%s' already exists." % (
                style.prefix,
                output_db_path,
            )
            print style.error_exit_message
            return 1
        else:
            os.unlink(output_db_path)

    # Note that we do not allow the user to update an existing LEMON database,
    # although it would make sense if we had just done photometry and now we
    # wanted to generate the light curves. However, and to be on the safe side,
    # we prefer to preserve a copy of the original database, as it is not
    # inconceivable that the astronomer may need to recompute the curves more
    # than once, each time with a different set of parameters.

    print "%sMaking a copy of the input database..." % style.prefix,
    sys.stdout.flush()
    shutil.copy2(input_db_path, output_db_path)
    util.owner_writable(output_db_path, True)  # chmod u+w
    print "done."

    with database.LEMONdB(output_db_path) as db:
        nstars = len(db)
        print "%sThere are %d stars in the database" % (style.prefix, nstars)

        for pfilter in sorted(db.pfilters):

            print style.prefix
            print "%sLight curves for the %s filter will now be generated." % (
                style.prefix,
                pfilter,
            )
            print "%sLoading photometric information..." % style.prefix,
            sys.stdout.flush()
            all_stars = [
                db.get_photometry(star_id, pfilter) for star_id in db.star_ids
            ]
            print "done."

            # The generation of each light curve is a task independent from the
            # others, so we can use a pool of workers and do it in parallel.
            pool = multiprocessing.Pool(options.ncores)
            map_async_args = ((star, all_stars, options) for star in all_stars)
            result = pool.map_async(parallel_light_curves, map_async_args)

            util.show_progress(0.0)
            while not result.ready():
                time.sleep(1)
                util.show_progress(queue.qsize() / len(all_stars) * 100)
                # Do not update the progress bar when debugging; instead, print it
                # on a new line each time. This prevents the next logging message,
                # if any, from being printed on the same line that the bar.
                if logging_level < logging.WARNING:
                    print

            result.get()  # reraise exceptions of the remote call, if any
            util.show_progress(100)  # in case the queue was ready too soon
            print

            # The multiprocessing queue contains two-element tuples,
            # mapping the ID of each star to its light curve.
            print "%sStoring the light curves in the database..." % style.prefix
            util.show_progress(0)
            light_curves = (queue.get() for x in xrange(queue.qsize()))
            for index, (star_id, curve) in enumerate(light_curves):

                # NoneType is returned by parallel_light_curves when the light
                # curve could not be calculated -- because it did not meet the
                # minimum number of images or comparison stars.
                if curve is None:
                    logging.debug("Nothing for star %d; light curve could not "
                                  "be generated" % star_id)
                    continue

                logging.debug("Storing light curve for star %d in database" %
                              star_id)
                db.add_light_curve(star_id, curve)
                logging.debug("Light curve for star %d successfully stored" %
                              star_id)

                util.show_progress(100 * (index + 1) / len(all_stars))
                if logging_level < logging.WARNING:
                    print

            else:
                logging.info("Light curves for %s generated" % pfilter)
                logging.debug("Committing database transaction")
                db.commit()
                logging.info("Database transaction commited")

                util.show_progress(100.0)
                print

        print "%sUpdating statistics about tables and indexes..." % style.prefix,
        sys.stdout.flush()
        db.analyze()
        print "done."

        # Update LEMONdB metadata
        db.date = time.time()
        db.author = pwd.getpwuid(os.getuid())[0]
        db.hostname = socket.gethostname()
        db.commit()

    util.owner_writable(output_db_path, False)  # chmod u-w
    print "%sYou're done ^_^" % style.prefix
    return 0
Esempio n. 14
0
def main(arguments=None):
    """ main() function, encapsulated in a method to allow for easy invokation.

    This method follows Guido van Rossum's suggestions on how to write Python
    main() functions in order to make them more flexible. By encapsulating the
    main code of the script in a function and making it take an optional
    argument the script can be called not only from other modules, but also
    from the interactive Python prompt.

    Guido van van Rossum - Python main() functions:
    http://www.artima.com/weblogs/viewpost.jsp?thread=4829

    Keyword arguments:
    arguments - the list of command line arguments passed to the script.

    """

    if arguments is None:
        arguments = sys.argv[1:]  # ignore argv[0], the script name
    (options, args) = parser.parse_args(args=arguments)

    # Adjust the logger level to WARNING, INFO or DEBUG, depending on the
    # given number of -v options (none, one or two or more, respectively)
    logging_level = logging.WARNING
    if options.verbose == 1:
        logging_level = logging.INFO
    elif options.verbose >= 2:
        logging_level = logging.DEBUG
    logging.basicConfig(format=style.LOG_FORMAT, level=logging_level)

    # Print the help and abort the execution if there are not two positional
    # arguments left after parsing the options, as the user must specify at
    # least one (only one?) input FITS file and the output directory
    if len(args) < 2:
        parser.print_help()
        return 2  # 2 is generally used for command line syntax errors
    else:
        input_paths = args[:-1]
        output_dir = args[-1]

    # Make sure that the output directory exists, and create it if it doesn't.
    # The subdirectories for discarded images are not yet created; we put this
    # off until we know that at least one image is indeed going to be excluded.
    util.determine_output_dir(output_dir)
    fwhm_dir = os.path.join(output_dir, options.fwhm_dir)
    elong_dir = os.path.join(output_dir, options.elong_dir)

    print "%s%d paths given as input, on which sources will be detected." % \
          (style.prefix, len(input_paths))
    print "%sRunning SExtractor on all the FITS images..." % style.prefix

    # Use a pool of workers and run SExtractor on the images in parallel!
    pool = multiprocessing.Pool(options.ncores)
    map_async_args = ((path, options) for path in input_paths
                      if os.path.isfile(path))
    result = pool.map_async(parallel_sextractor, map_async_args)

    util.show_progress(0.0)
    while not result.ready():
        time.sleep(1)
        util.show_progress(queue.qsize() / len(input_paths) * 100)
        # Do not update the progress bar when debugging; instead, print it
        # on a new line each time. This prevents the next logging message,
        # if any, from being printed on the same line that the bar.
        if logging_level < logging.WARNING:
            print

    result.get()  # reraise exceptions of the remote call, if any
    util.show_progress(100)  # in case the queue was ready too soon
    print

    # Three sets, to keep the track of all the images on which SExtractor
    # has been run and also of which have been discarded because of their
    # unnaceptable FWHM or elongation ratio.
    all_images = set()
    fwhm_discarded = set()
    elong_discarded = set()

    # Dictionary mapping each input image to the temporary output file: a copy
    # of the input image but whose FITS header has been updated with the path
    # to the SExtractor catalog and the MD5 hash of the configuration files.
    seeing_tmp_paths = dict()

    # Extract the four-element tuples (path to the image, FWHM, elongation and
    # number of sources detected by SExtractor) from the multiprocessing' queue
    # and store the values in three independent dictionaries; these provide
    # fast access, with O(1) lookup, to the data.
    fwhms = {}
    elongs = {}
    nstars = {}

    for _ in xrange(queue.qsize()):
        path, output_tmp_path, fwhm, elong, stars = queue.get()
        all_images.add(path)
        seeing_tmp_paths[path] = output_tmp_path

        # The clean-up function cannot be registered in parallel_sextractor()
        # because it would remove the temporary FITS file when the process
        # terminates (instead of when our program exits, which is what we
        # need). Do it here, to make sure that whatever happens next these
        # temporary files are always deleted.
        atexit.register(util.clean_tmp_files, output_tmp_path)

        fwhms[path] = fwhm
        elongs[path] = elong
        nstars[path] = stars

    if not all_images:
        print "%sError. No FITS images were detected." % style.prefix
        print style.error_exit_message
        return 1

    # Let's first discard those images with a bad full width at half maximum.
    # In order to to this, we fit a normal distribution (assuming the FWHMs to
    # be Gaussian distributed) and define the maximum allowed value as that
    # which exceeds the specified number of standard deviations of the mean.

    print "%sFitting a Gaussian distribution to the FWHMs..." % style.prefix,
    sys.stdout.flush()
    logging.debug("Fitting a Gaussian distribution to the %d FWHMs" %
                  len(fwhms))
    mu, sigma = scipy.stats.norm.fit(fwhms.values())
    logging.debug("FWHMs mean = %.3f" % mu)
    logging.debug("FWHMs sigma = %.3f" % sigma)
    print 'done.'
    sys.stdout.flush()

    print "%sFWHMs mean = %.3f, sigma = %.3f pixels" % (style.prefix, mu,
                                                        sigma)
    maximum_fwhm = mu + (options.fwhm_sigma * sigma)
    logging.debug("Maximum allowed FWHM = %.3f + %.1f x %.3f = %.3f pixels" % \
                 (mu, options.fwhm_sigma, sigma, maximum_fwhm))
    print "%sDiscarding images with a FWHM > %.3f + %.1f x %.3f = %.3f pixels..." % \
          (style.prefix, mu, options.fwhm_sigma, sigma, maximum_fwhm)

    # Exclude images by adding them to the FWHM-discarded set
    for path, fwhm in sorted(fwhms.iteritems()):
        if fwhm > maximum_fwhm:
            fwhm_discarded.add(path)
            logging.debug("%s discarded (FWHM = %.3f > %.3f" % \
                         (path, fwhm, maximum_fwhm))
            print "%s%s discarded (FWHM = %.3f)" % (style.prefix, path, fwhm)

    logging.info("Images discarded by FWHM: %d" % len(fwhm_discarded))
    if not fwhm_discarded:
        print "%sNo images were discarded because of their FWHM. Hooray!" % style.prefix
    else:
        discarded_fraction = len(fwhm_discarded) / len(all_images) * 100
        nleft = len(all_images) - len(fwhm_discarded)  # non-discarded images
        print "%s%d FITS images (%.2f %%) discarded, %d remain" % \
              (style.prefix,  len(fwhm_discarded), discarded_fraction, nleft)

    # Repeat the same approach, now with the elongation ratios. Images already
    # discarded because of their FWHM are not even considered -- why discard
    # them twice? They can simply be ignored.

    print "%sFitting a Gaussian distribution to the elongations..." % style.prefix,
    sys.stdout.flush()
    mu, sigma = scipy.stats.norm.fit(elongs.values())
    logging.debug("Elongations mean = %.3f" % mu)
    logging.debug("Elongations sigma = %.3f" % sigma)
    print 'done.'
    sys.stdout.flush()

    print "%sElongation mean = %.3f, sigma = %.3f pixels" % (style.prefix, mu,
                                                             sigma)
    maximum_elong = mu + (options.elong_sigma * sigma)
    logging.debug("Maximum allowed elongation = %.3f + %.1f x %.3f = %.3f pixels" % \
                 (mu, options.elong_sigma, sigma, maximum_elong))
    print "%sDiscarding images with an elongation > %.3f + %.1f x %.3f = %.3f ..." % \
          (style.prefix, mu, options.elong_sigma, sigma, maximum_elong)

    for path, elong in sorted(elongs.iteritems()):
        # Ignore FWHM-discarded images
        if path in fwhm_discarded:
            logging.debug("%s ignored (already discarded by FWHM)" % path)
            continue
        elif elong > maximum_elong:
            elong_discarded.add(path)
            logging.debug("%s discarded (elongation = %.3f > %.3f" % \
                         (path, fwhm, maximum_elong))
            print "%s%s discarded (elongation = %.3f)" % (style.prefix, path,
                                                          elong)

    logging.info("Images discarded by elongation: %d" % len(elong_discarded))
    if not elong_discarded:
        print "%sNo images were discarded because of their elongation. Yay!" % style.prefix
    else:
        initial_size = len(all_images) - len(fwhm_discarded)
        discarded_fraction = len(elong_discarded) / initial_size * 100
        nleft = initial_size - len(elong_discarded)
        print "%s%d FITS images (%.2f %%) discarded, %d remain" % \
              (style.prefix,  len(elong_discarded), discarded_fraction, nleft)

    # Finally, take the images whose number of stars is at the 'stars_per'
    # percentile and select the one with the best FWHM. This will be our
    # 'best-seeing' image, in which sources may be detected. Taking directly
    # the image with the best FWHM may not work as we need the best-seeomg
    # image to also be one of the most populated.

    print "%sIdentifying the images whose number of detected sources it at " \
          "the %.2f percentile..." % (style.prefix, options.stars_per) ,
    sys.stdout.flush()
    # Ignore discarded images, for whatever reason
    logging.debug("Finding the %.2f percentile of the number of stars " \
                 "detected by SExtractor"  % options.stars_per)
    for path in fwhm_discarded.union(elong_discarded):
        del nstars[path]
        reason = 'FWHM' if path in fwhm_discarded else 'elongation'
        logging.debug("%s ignored (was discarded by %s)" % (path, reason))
    min_nstars = scipy.stats.scoreatpercentile(nstars.values(),
                                               options.stars_per)
    print 'done.'

    print "%sNumber of stars at percentile = %d, taking the images with at " \
          "least this number of sources..." % (style.prefix, min_nstars) ,
    sys.stdout.flush()
    most_populated_images = [
        path for path, stars in nstars.iteritems() if stars >= min_nstars
    ]

    logging.debug("There are %s images with a number of stars at the %.2f " \
                 "percentile" % (len(most_populated_images), options.stars_per))
    logging.debug("Identifying the image with the lowest FWHM")
    print 'done.'

    print "%sFinally, finding the image with the lowest FWHM among these " \
          "%d images..." % (style.prefix, len(most_populated_images)),
    sys.stdout.flush()

    # Find the image with the best seeing (lowest FWHM)
    best_seeing = min(most_populated_images, key=lambda path: fwhms[path])
    logging.debug("Best-seeing image: %s" % path)
    logging.debug("Best-seeing image FWHM = %.3f" % fwhms[best_seeing])
    logging.debug("Best-seeing image elongation = %.3f" % elongs[best_seeing])
    logging.debug("Best-seeing image sources = %d" % nstars[best_seeing])
    assert best_seeing not in fwhm_discarded
    assert best_seeing not in elong_discarded
    print 'done.'

    print "%sBest-seeing image = %s, with %d sources and a FWHM of %.3f pixels" % \
          (style.prefix, best_seeing, nstars[best_seeing], fwhms[best_seeing])

    # The subdirectories are created only if at least one image is going to be
    # discarded. We do not want empty directories in case no image is discarded
    # because of its full-width at half maximum (FWHM) or elongation.

    if fwhm_discarded:
        util.determine_output_dir(fwhm_dir, quiet=True)

    if elong_discarded:
        util.determine_output_dir(elong_dir, quiet=True)

    # Finally, copy all the FITS images to the output directory
    processed = 0
    for path in sorted(all_images):
        # Add the suffix to the basename of the FITS image
        root, ext = os.path.splitext(os.path.basename(path))
        output_filename = root + options.suffix + ext
        logging.debug("Basename '%s' + '%s' becomes '%s'" % \
                     (path, options.suffix, output_filename))

        if path in fwhm_discarded:
            output_path = os.path.join(fwhm_dir, output_filename)
            logging.debug("%s was discarded because of its FWHM" % path)
            logging.debug("%s to be copied to subdirectory %s" %
                          (path, fwhm_dir))
            history_msg1 = "Image discarded by LEMON on %s" % util.utctime()
            history_msg2 = "[Discarded] FWHM = %.3f pixels, maximum allowed value = %.3f" % \
                           (fwhms[path], maximum_fwhm)

        elif path in elong_discarded:
            output_path = os.path.join(elong_dir, output_filename)
            logging.debug("%s was discarded because of its elongation ratio" %
                          path)
            logging.debug("%s to be copied to subdirectory %s" %
                          (path, elong_dir))
            history_msg1 = "Image discarded by LEMON on %s" % util.utctime()
            history_msg2 = "[Discarded] Elongation = %.3f, maximum allowed value = %.3f" % \
                           (elongs[path], maximum_elong)

        elif path == best_seeing:

            # Retain original name if --filename is an empty string
            if not options.bseeingfn:
                filename = output_filename
            else:
                filename = options.bseeingfn

            output_path = os.path.join(output_dir, filename)
            logging.debug("%s is the best-seeing image" % path)
            logging.debug("%s to be copied to directory %s with name %s" % \
                         (path, output_dir, options.bseeingfn))
            history_msg1 = "Image identified by LEMON as the 'best-seeing' one"
            history_msg2 = "FWHM = %.3f | Elongation = %.3f | Sources: %d (at %.2f percentile)" % \
                           (fwhms[path], elongs[path], nstars[path], options.stars_per)

        else:
            output_path = os.path.join(output_dir, output_filename)
            logging.debug("%s to be copied to %s" % (path, output_dir))
            history_msg1 = "Image FWHM = %.3f" % fwhms[path]
            history_msg2 = "Image elongation = %.3f" % elongs[path]

        if os.path.exists(output_path) and not options.overwrite:
            msg = ("%sError. Output FITS file '%s' already exists. "
                   "You need to use --overwrite.")
            args = style.prefix, output_path
            print msg % args
            print style.error_exit_message
            return 1

        else:
            src = seeing_tmp_paths[path]
            shutil.move(src, output_path)

        util.owner_writable(output_path, True)  # chmod u+w
        logging.debug("%s copied to %s" % (path, output_path))
        output_img = fitsimage.FITSImage(output_path)
        output_img.add_history(history_msg1)
        output_img.add_history(history_msg2)
        logging.debug("%s: FITS header updated (HISTORY keywords)" % path)

        # Copy the FWHM to the FITS header, for future reference
        comment = "Margin = %d, SNR percentile = %.3f" % (options.margin,
                                                          options.per)
        output_img.update_keyword(options.fwhmk, fwhms[path], comment=comment)
        logging.debug("%s: FITS header updated (%s keyword)" %
                      (path, options.fwhmk))

        print "%sFITS image %s saved to %s" % (style.prefix, path, output_path)
        processed += 1

    print "%sA total of %d images was saved to directory '%s'." % (
        style.prefix, processed, output_dir)
    print "%sWe're done ^_^" % style.prefix
    return 0