コード例 #1
0
ファイル: crawler.py プロジェクト: LXiong/miner
def download_user_statuses(uid_tasks,download_comments=False, download_pictures=False):
    for uid, path in uid_tasks.iteritems():
        print('Downloading user status of [%s].' % uid)
        status_fpath = '%s/Status/%s.json' % (base_dir, uid)
        if not os.path.exists(status_fpath):
            try:
                statuses = get_all_statuses(uid=uid)
                with codecs.open((path % 'Status')+'.json', 'w', encoding='utf-8') as f:
                    json.dump(statuses,f,ensure_ascii=False)
            except APIError as e:
                print uid, e
            except Exception as e:
                print uid, e
        elif download_comments or download_pictures:
            with codecs.open(status_fpath,'r','utf-8-sig') as fp_status:
               statuses = json.load(fp_status,encoding='utf-8')

        if download_comments:   #download status comments
            cmt_folder = '%s/Comments/%s/' % (base_dir, uid)
            if os.path.exists(cmt_folder + 'done'): continue

            try: os.makedirs(cmt_folder)
            except: pass

            for status in statuses:
                cmts_count = int( status['comments_count'] )
                if cmts_count < 1:continue

                mid = str( status['id'] )

                cmt_fpath = cmt_folder + '%s.json' % mid
                if os.path.exists(cmt_fpath):
                    continue

                comments = get_commnets_by_status(mid,cmts_count)
                cmt_len = len(comments)
                print('  Download Comment [%s].[%s]->[%d]' % (uid,mid,cmt_len))
                if cmt_len < 1:continue

                with codecs.open(cmt_fpath,'w',encoding='utf-8') as f:
                    json.dump(comments, f, ensure_ascii=False)

            with codecs.open(cmt_folder + 'done', 'w', encoding='utf-8') as fp:
                fp.write('done')

        if download_pictures:   #download status original pictures
            pic_folder = '%s/Pictures/%s/' % (base_dir, uid)
            if os.path.exists(pic_folder + 'done'):
                continue

            try: os.makedirs(pic_folder)
            except: pass

            all_success = True
            for status in statuses:
                #time.sleep(sleep_span)
                pics = status.get('pic_urls')
                if pics is None or len(pics)==0:
                    continue

                mid = str( status['id'] )

                i_pic = 0
                for item in pics:
                    pic = item.pop('thumbnail_pic')
                    pic_url = pic.replace('thumbnail','large')
                    ind = pic_url.rindex('/') + 1
                    alias = '%s_%d_%s' % (mid,i_pic, pic_url[ind+1:])
                    success = util.store_image(pic_url, pic_folder + alias)
                    all_success &= success
                    i_pic += 0

                print('  Download Pictures [%s].[%s]->[%d]' % (uid,mid,len(pics)))

            if all_success:
                with codecs.open(pic_folder + 'done', 'w', encoding='utf-8') as fp:
                    fp.write('done')
コード例 #2
0
def download_user_statuses(uid_tasks,
                           download_comments=False,
                           download_pictures=False):
    for uid, path in uid_tasks.iteritems():
        print('Downloading user status of [%s].' % uid)
        status_fpath = '%s/Status/%s.json' % (base_dir, uid)
        if not os.path.exists(status_fpath):
            try:
                statuses = get_all_statuses(uid=uid)
                with codecs.open((path % 'Status') + '.json',
                                 'w',
                                 encoding='utf-8') as f:
                    json.dump(statuses, f, ensure_ascii=False)
            except APIError as e:
                print uid, e
            except Exception as e:
                print uid, e
        elif download_comments or download_pictures:
            with codecs.open(status_fpath, 'r', 'utf-8-sig') as fp_status:
                statuses = json.load(fp_status, encoding='utf-8')

        if download_comments:  #download status comments
            cmt_folder = '%s/Comments/%s/' % (base_dir, uid)
            if os.path.exists(cmt_folder + 'done'): continue

            try:
                os.makedirs(cmt_folder)
            except:
                pass

            for status in statuses:
                cmts_count = int(status['comments_count'])
                if cmts_count < 1: continue

                mid = str(status['id'])

                cmt_fpath = cmt_folder + '%s.json' % mid
                if os.path.exists(cmt_fpath):
                    continue

                comments = get_commnets_by_status(mid, cmts_count)
                cmt_len = len(comments)
                print('  Download Comment [%s].[%s]->[%d]' %
                      (uid, mid, cmt_len))
                if cmt_len < 1: continue

                with codecs.open(cmt_fpath, 'w', encoding='utf-8') as f:
                    json.dump(comments, f, ensure_ascii=False)

            with codecs.open(cmt_folder + 'done', 'w', encoding='utf-8') as fp:
                fp.write('done')

        if download_pictures:  #download status original pictures
            pic_folder = '%s/Pictures/%s/' % (base_dir, uid)
            if os.path.exists(pic_folder + 'done'):
                continue

            try:
                os.makedirs(pic_folder)
            except:
                pass

            all_success = True
            for status in statuses:
                #time.sleep(sleep_span)
                pics = status.get('pic_urls')
                if pics is None or len(pics) == 0:
                    continue

                mid = str(status['id'])

                i_pic = 0
                for item in pics:
                    pic = item.pop('thumbnail_pic')
                    pic_url = pic.replace('thumbnail', 'large')
                    ind = pic_url.rindex('/') + 1
                    alias = '%s_%d_%s' % (mid, i_pic, pic_url[ind + 1:])
                    success = util.store_image(pic_url, pic_folder + alias)
                    all_success &= success
                    i_pic += 0

                print('  Download Pictures [%s].[%s]->[%d]' %
                      (uid, mid, len(pics)))

            if all_success:
                with codecs.open(pic_folder + 'done', 'w',
                                 encoding='utf-8') as fp:
                    fp.write('done')
コード例 #3
0
ファイル: dirty.py プロジェクト: radio-astro/gyimager
def dirty(options):
    # Create the data processor. The data processor is an abstration over
    # different gridding / degridding algorithms. The idea is that the data
    # processor transforms from image to visibilities and vice versa. The rest
    # of the code only works on images and does not (need to) accesss visibility
    # data.
    #
    # Several implementation of the data processor interface (see
    # processors/data_processor_base.py) are available. The idea is to have
    # optimized implementations for specific cases, as well as (possibly slower)
    # generic implementations.
    #
    # TODO: Need to create a smaller set of options that are required when the
    # data processor is instantiated. For example, to create an empty image,
    # details about the weighting scheme are not important.
    #
    max_baseline = options.max_baseline if options.max_baseline > 0.0 else \
        10000.0
    processor_options = {}
    processor_options["processor"] = options.processor
    processor_options["w_max"] = max_baseline
    processor_options["padding"] = 1.0
    processor_options["image"] = options.image
    processor_options["threads"] = options.threads
    processor_options["weighttype"] = options.weighttype
    processor_options["rmode"] = options.rmode
    processor_options["noise"] = options.noise
    processor_options["robustness"] = options.robustness
    processor_options["profile"] = options.profile
    processor_options["chunksize"] = options.chunksize

    processor_options["gridding.ATerm.name"] = "ATermPython"
    processor_options["ATermPython.module"] = "imager.myaterm"
    processor_options["ATermPython.class"] = "MyATerm"

    processor = processors.create_data_processor(options.ms, processor_options)

    channel_freq = processor.channel_frequency()
    channel_width = processor.channel_width()

    # Estimate the size of the image in radians, based on an esitmate of the
    # FWHM of the station beam, assuming a station diameter of 70 meters.
    max_freq = numpy.max(channel_freq)
    image_size = 2.0 * util.full_width_half_max(70.0, max_freq)

    # TODO: Cyril mentioned above image size estimation is too conservative.
    # Need to check this and find a better estimate if necessary. For now, will
    # just multiply estimated FOV by 2.0.
    image_size *= 2.0

    # Estimate the number of pixels and the pixels size in radians such that
    # the image is sampled at approximately 3 pixels per beam.
    (n_px, delta_px) = util.image_configuration(image_size, max_freq,
        max_baseline)

    util.notice("image configuration:")
    util.notice("    size: %d x %d pixel" % (n_px, n_px))
    util.notice("    angular size: %.2f deg" % (image_size * 180.0 / numpy.pi))
    util.notice("    angular resolution @ 3 pixel/beam: %.2f arcsec/pixel"
        % (3600.0 * delta_px * 180.0 / numpy.pi))

    # Create an empty image. For the moment, the implementation is limited to
    # single channel images.
    image_shape = (1, 4, n_px, n_px)
    image_coordinates = pyrap.images.coordinates.coordinatesystem(
        casaimwrap.make_coordinate_system(image_shape[2:], [delta_px,
        delta_px], processor.phase_reference(), channel_freq, channel_width))

    # Call the data processor to grid the visibility data (i.e. compute the
    # dirty image).
    util.notice("creating dirty image...")
    tab = pyrap.tables.table(options.ms)
    nrows = tab.nrows()
    tab.close()
    print "There are ", nrows, " rows in the MS..."
    if options.chunksize > 0 and options.chunksize <= nrows:
      print 'calling grid_chunk...'
      dirty_image, _ = processor.grid_chunk(image_coordinates, image_shape,
        processors.Normalization.FLAT_NOISE, options.chunksize)
    else:
      print 'calling grid...'
      dirty_image, _ = processor.grid(image_coordinates, image_shape,
        processors.Normalization.FLAT_NOISE)

    # Store output images. Store both a flat noise and a flat gain image.
    util.notice("storing dirty images...")
    util.store_image(options.image + ".dirty.flat_noise",
        image_coordinates, dirty_image)
コード例 #4
0
ファイル: mfclean.py プロジェクト: radio-astro/gyimager
def mfclean(options):
    clark_options = {}
    clark_options["gain"] = options.gain
    clark_options["iterations"] = options.iterations
    clark_options["cycle_speedup"] = options.cycle_speedup

    max_baseline = options.max_baseline if options.max_baseline > 0.0 else \
        10000.0

    processor_options = {}
    processor_options["processor"] = options.processor
    processor_options["w_max"] = max_baseline
    processor_options["padding"] = 1.0
    processor_options["image"] = options.image
    processor_options["threads"] = options.threads
    processor_options["weighttype"] = options.weighttype
    processor_options["rmode"] = options.rmode
    processor_options["noise"] = options.noise
    processor_options["robustness"] = options.robustness
    processor_options["profile"] = options.profile
    processor = processors.create_data_processor(options.ms, processor_options)

    channel_freq = processor.channel_frequency()
    channel_width = processor.channel_width()

    max_freq = numpy.max(channel_freq)
    image_size = 2.0 * util.full_width_half_max(70.0, max_freq)

    # TODO: Cyril mentioned above image size estimation is too conservative.
    # Need to check this and find a better estimate if necessary. For now, will
    # just multiply estimated FOV by 2.0.
    image_size *= 2.0

    (n_px, delta_px) = util.image_configuration(image_size, max_freq,
        max_baseline)

    util.notice("image configuration:")
    util.notice("    size: %d x %d pixel" % (n_px, n_px))
    util.notice("    angular size: %.2f deg"
        % (image_size * 180.0 / numpy.pi))
    util.notice("    angular resolution @ 3 pixel/beam: %.2f arcsec/pixel"
        % (3600.0 * delta_px * 180.0 / numpy.pi))

    # TODO: Need to implement support for multiple channel images. Currently,
    # all data channels are combined into a single MFS image per correlation.
    image_shape = (1, 4, n_px, n_px)
    image_coordinates = pyrap.images.coordinates.coordinatesystem(
        casaimwrap.make_coordinate_system(image_shape[2:], [delta_px,
        delta_px], processor.phase_reference(), channel_freq, channel_width))

    n_model = 1
    # TODO: Check code for n_model > 1!
    assert(n_model == 1)

    # Comment from CASA source code:
    #
    # Set to search for peak in I^2+Q^2+U^2+V^2 domain or each stokes plane
    # seperately. Ignored for hogbom and msclean for now.
#    join_stokes = False
    join_stokes = True

    # Compute approximate PSFs.
    util.notice("computing approximate point spread functions...")
    psf = [None for i in range(n_model)]
    beam = [None for i in range(n_model)]
    for i in range(n_model):
        psf[i] = processor.point_spread_function(image_coordinates, image_shape)
        fit = casaimwrap.fit_gaussian_psf(image_coordinates.dict(),
            psf[i])
        assert(fit["ok"])

        beam[i] = BeamParameters((fit["major"] * numpy.pi) / (3600.0 * 180.0),
            (fit["minor"] * numpy.pi) / (3600.0 * 180.0), (fit["angle"]
            * numpy.pi) / 180.0)

        util.notice("model %d/%d: major axis: %f arcsec, minor axis: %f arcsec,"
            " position angle: %f deg" % (i, n_model - 1, abs(fit["major"]),
            abs(fit["minor"]), fit["angle"]))

    # Validate PSFs.
    (min_psf, max_psf, max_psf_outer, psf_patch_size, max_sidelobe) = \
        validate_psf(image_coordinates, psf, beam)
    clark_options["psf_patch_size"] = psf_patch_size

    updated = [False for i in range(n_model)]
    weight = [None for i in range(n_model)]
    model = [numpy.zeros(image_shape) for i in range(n_model)]
    delta = [numpy.zeros(image_shape) for i in range(n_model)]
    residual = [numpy.zeros(image_shape) for i in range(n_model)]

    if join_stokes:
        iterations = numpy.zeros((n_model, 1, image_shape[0]))
        stokes = ["JOINT"]
        cr_slices = [slice(None)]
    else:
        iterations = numpy.zeros((n_model, image_shape[1], image_shape[0]))
        stokes = image_coordinates.get_coordinate("stokes").get_stokes()
        cr_slices = [slice(i, i + 1) for i in range(4)]

    cycle = 0
    diverged = False
    absmax = options.threshold
    previous_absmax = 1e30

    while absmax >= options.threshold and numpy.max(iterations) \
        < options.iterations and (cycle == 0 or any(updated)):

        util.notice(">> starting major cycle: %d <<" % cycle)

        # Comment from CASA source code:
        #
        # Make the residual images. We do an incremental update for cycles after
        # the first one. If we have only one model then we use convolutions to
        # speed the processing
        util.notice("computing residuals...")

        # TODO: If n_models > 1, need to compute residuals from the sum of
        # the degridded visibilities (see LofarCubeSkyEquation.cc).
        assert(n_model == 1)
        if cycle == 0:
            # Assuming the initial models are zero, the residual visibilities
            # equal the observed visibilities and therefore we only need to
            # grid them.
            for i in range(n_model):
                residual[i], weight[i] = processor.grid(image_coordinates,
                    image_shape, processors.Normalization.FLAT_NOISE)
        else:
            for i in range(n_model):
                if updated[i]:
                    residual[i], weight[i] = \
                        processor.residual(image_coordinates, model[i],
                            processors.Normalization.FLAT_NOISE,
                            processors.Normalization.FLAT_NOISE)
                updated[i] = False

        # Compute residual statistics.
        (absmax, resmin, resmax) = max_field(residual, weight)

        # Print some statistics.
        for i in range(n_model):
            util.notice("model %d/%d: min residual: %f, max residual: %f"
                % (i, n_model - 1, resmin[i], resmax[i]))
        util.notice("peak residual: %f" % absmax)

        # Comment from CASA source code:
        #
        # Check if absmax is 5% above its previous value.
        #
        # TODO: Value used does not look like 5%?
        if absmax >= 1.000005 * previous_absmax:
            diverged = True
            break

        # Store absmax of this major cycle for later reference.
        previous_absmax = absmax

        # Check stop criterium.
        if absmax < options.threshold:
            break

        # TODO: What is this really used for? And does the max weight indeed
        # correspond to sensitivity in Jy/beam?
        if cycle == 0:
            max_weight = 0.0
            for i in range(n_model):
                max_weight = max(max_weight, numpy.max(weight[i]))
            util.notice("maximum sensitivity: %f Jy/beam" % (1.0
                / numpy.sqrt(max_weight)))

        # Comment from CASA source code:
        #
        # Calculate the threshold for this cycle. Add a safety factor
        #
        # fractionOfPsf controls how deep the cleaning should go.
        # There are two user-controls.
        # cycleFactor_p : scale factor for the PSF sidelobe level.
        #                        1 : clean down to the psf sidelobe level
        #                        <1 : go deeper
        #                        >1 : shallower : stop sooner.
        #                        Default : 1.5
        # cycleMaxPsfFraction_p : scale factor as a fraction of the PSF peak
        #                                    must be 0.0 < xx < 1.0 (obviously)
        #                                    Default : 0.8
        fraction_of_psf = min(options.cycle_max_psf_fraction,
            options.cycle_factor * max_sidelobe)

        if fraction_of_psf > 0.8:
            util.warning("PSF fraction for threshold computation is too"
                " high: %f. Forcing to 0.8 to ensure that the threshold is"
                " smaller than the peak residual!" % fraction_of_psf)
            fraction_of_psf = 0.8   # painfully slow!

        # Update cycle threshold.
        cycle_threshold = max(0.95 * options.threshold, fraction_of_psf
            * absmax)
        clark_options["cycle_threshold"] = cycle_threshold

        util.notice("minor cycle threshold max(0.95 * %f, peak residual * %f):"
            " %f" % (options.threshold, fraction_of_psf, cycle_threshold))

        # Execute the minor cycle (Clark clean) for each channel of each model.
        util.notice("starting minor cycle...")
        for i in range(n_model):
            if max(abs(resmin[i]), abs(resmax[i])) < cycle_threshold:
                util.notice("model %d/%d: peak residual below threshold"
                    % (i, n_model - 1))
                continue

            if max_psf[i] <= 0.0:
                util.warning("model %d/%d: point spread function negative or"
                    " zero" % (i, n_model - 1))
                continue

            # Zero the delta image for this model.
            delta[i].fill(0.0)

            for (cr, cr_slice) in enumerate(cr_slices):
                for ch in range(len(residual[i])):
                    # TODO: The value of max_weight is only updated during
                    # cycle 0. Is this correct?
                    #
                    assert(len(weight[i].shape) == 2
                        and weight[i].shape[:2] == residual[i].shape[:2])

                    plane_weight = numpy.sqrt(weight[i][ch, cr_slice]
                        / max_weight)
                    if numpy.any(plane_weight > 0.01):
                        weight_mask = numpy.ones((residual[i].shape[2:]))
                    else:
                        weight_mask = numpy.zeros((residual[i].shape[2:]))

                    # Call CASA Clark clean implementation (minor cycle).
                    # TODO: When cleaning each Stokes parameter separately,
                    # the PSF of Stokes I is used for all others as well?
                    #
                    # Comment from CASA source code:
                    #
                    # We only want the PSF for the first polarization so we
                    # iterate over polarization LAST.
                    #
                    result = casaimwrap.clark_clean(psf[i][ch,0,:,:],
                        residual[i][ch,cr_slice,:,:], weight_mask,
                        iterations[i,cr,ch], clark_options)

                    if result["iterations"] > iterations[i,cr,ch]:
                        updated[i] = True
                        delta[i][ch,cr_slice,:,:] = result["delta"]
                        iterations[i,cr,ch] = result["iterations"]
                    else:
                        assert(numpy.all(result["delta"] == 0.0))

                util.notice("model %d/%d: stokes: %s, cleaned: %f Jy, "
                    "iterations per channel: %s" % (i, n_model - 1,
                    stokes[cr], numpy.sum(delta[i][ch,cr_slice,:,:]),
                    str(iterations[i,cr,:])))

        # Update model images if required.
        for i in range(n_model):
            if updated[i]:
                model[i] += delta[i]

        # Update major cycle counter.
        cycle += 1

    if any(updated):
        util.notice("finalizing residual images for all fields...")
        for i in range(n_model):
            if updated[i]:
                residual[i], weight[i] = processor.residual(image_coordinates,
                    model[i], processors.Normalization.FLAT_NOISE,
                    processors.Normalization.FLAT_NOISE)
        (absmax, resmin, resmax) = max_field(residual, weight)

        # Print some statistics.
        for i in range(n_model):
            util.notice("model %d/%d: min residual: %f, max residual: %f"
                % (i, n_model - 1, resmin[i], resmax[i]))
        util.notice("peak residual: %f" % absmax)
    else:
        util.notice("residual images for all fields are up-to-date...")

    # Store output images.
    util.notice("storing average response...")
    util.store_image(options.image + ".response", image_coordinates,
        processor.response(image_coordinates, image_shape))

    util.notice("storing model images...")
    for i in range(n_model):
        util.store_image(options.image + ".model.flat_noise",
            image_coordinates, model[i])
        util.store_image(options.image + ".model", image_coordinates,
            processor.normalize(image_coordinates, model[i],
            processors.Normalization.FLAT_NOISE,
            processors.Normalization.FLAT_GAIN))

    util.notice("storing residual images...")
    for i in range(n_model):
        util.store_image(options.image + ".residual.flat_noise",
            image_coordinates, residual[i])
        util.store_image(options.image + ".residual", image_coordinates,
            processor.normalize(image_coordinates, residual[i],
            processors.Normalization.FLAT_NOISE,
            processors.Normalization.FLAT_GAIN))

    util.notice("storing restored images...")
    for i in range(n_model):
        restored = restore_image(image_coordinates.dict(), model[i],
            residual[i], beam[i])

        util.store_image(options.image + ".restored.flat_noise",
            image_coordinates, restored)
        util.store_image(options.image + ".restored", image_coordinates,
            processor.normalize(image_coordinates, restored,
            processors.Normalization.FLAT_NOISE,
            processors.Normalization.FLAT_GAIN))

    # Print some statistics.
    for i in range(n_model):
        util.notice("model %d/%d: clean flux: %f, residual rms: %f" % (i,
            n_model - 1, numpy.sum(model[i]), numpy.std(residual[i])))

    if diverged:
        util.error("clean diverged.")
    elif absmax < options.threshold:
        util.notice("clean converged.")
    else:
        util.warning("clean did not reach threshold: %f Jy."
            % options.threshold)
コード例 #5
0
def dirty(options):
    # Create the data processor. The data processor is an abstration over
    # different gridding / degridding algorithms. The idea is that the data
    # processor transforms from image to visibilities and vice versa. The rest
    # of the code only works on images and does not (need to) accesss visibility
    # data.
    #
    # Several implementation of the data processor interface (see
    # processors/data_processor_base.py) are available. The idea is to have
    # optimized implementations for specific cases, as well as (possibly slower)
    # generic implementations.
    #
    # TODO: Need to create a smaller set of options that are required when the
    # data processor is instantiated. For example, to create an empty image,
    # details about the weighting scheme are not important.
    #
    max_baseline = options.max_baseline if options.max_baseline > 0.0 else \
        10000.0
    processor_options = {}
    processor_options["processor"] = options.processor
    processor_options["w_max"] = max_baseline
    processor_options["padding"] = 1.0
    processor_options["ms"] = options.ms
    processor_options["image"] = options.image
    processor_options["threads"] = options.threads
    processor_options["weighttype"] = options.weighttype
    processor_options["rmode"] = options.rmode
    processor_options["noise"] = options.noise
    processor_options["robustness"] = options.robustness
    processor_options["profile"] = options.profile
    processor_options["chunksize"] = options.chunksize
    processor_options["outcol"] = options.outcol
    processor_options["beamname"] = options.beamname

    #processor_options["gridding.ATerm.name"] = "ATermLofar"
    processor_options["gridding.ATerm.name"] = "ATermPython"
    processor_options["ATermPython.module"] = "lofar.imager.myaterm"
    processor_options["ATermPython.class"] = "MyATerm"

    processor = processors.create_data_processor(options.ms, processor_options)

    channel_freq = processor.channel_frequency()
    channel_width = processor.channel_width()

    # Estimate the size of the image in radians, based on an esitmate of the
    # FWHM of the station beam, assuming a station diameter of 70 meters.
    max_freq = numpy.max(channel_freq)
    image_size = 2.0 * util.full_width_half_max(70.0, max_freq)

    # TODO: Cyril mentioned above image size estimation is too conservative.
    # Need to check this and find a better estimate if necessary. For now, will
    # just multiply estimated FOV by 2.0.
    image_size *= 2.0

    # Estimate the number of pixels and the pixels size in radians such that
    # the image is sampled at approximately 3 pixels per beam.
    (n_px, delta_px) = util.image_configuration(image_size, max_freq,
                                                max_baseline)

    util.notice("image configuration:")
    util.notice("    size: %d x %d pixel" % (n_px, n_px))
    util.notice("    angular size: %.2f deg" % (image_size * 180.0 / numpy.pi))
    util.notice("    angular resolution @ 3 pixel/beam: %.2f arcsec/pixel" %
                (3600.0 * delta_px * 180.0 / numpy.pi))

    # Create an empty image. For the moment, the implementation is limited to
    # single channel images.
    image_shape = (1, 4, n_px, n_px)
    image_coordinates = pyrap.images.coordinates.coordinatesystem(
        lofar.casaimwrap.make_coordinate_system(image_shape[2:],
                                                [delta_px, delta_px],
                                                processor.phase_reference(),
                                                channel_freq, channel_width))

    # Call the data processor to grid the visibility data (i.e. compute the
    # dirty image).
    util.notice("creating dirty image...")
    tab = pyrap.tables.table(options.ms)
    nrows = tab.nrows()
    tab.close()
    print "There are ", nrows, " rows in the MS..."
    if options.chunksize > 0 and options.chunksize <= nrows:
        print 'calling grid_chunk...'
        dirty_image, _ = processor.grid_chunk(
            image_coordinates, image_shape,
            processors.Normalization.FLAT_NOISE, options.chunksize)
    else:
        print 'calling grid...'
        dirty_image, _ = processor.grid(image_coordinates, image_shape,
                                        processors.Normalization.FLAT_NOISE)

    # Store output images. Store both a flat noise and a flat gain image.
    util.notice("storing dirty images...")
    util.store_image(options.image + ".dirty.flat_noise", image_coordinates,
                     dirty_image)
コード例 #6
0
def mfclean(options):
    clark_options = {}
    clark_options["gain"] = options.gain
    clark_options["iterations"] = options.iterations
    clark_options["cycle_speedup"] = options.cycle_speedup

    max_baseline = options.max_baseline if options.max_baseline > 0.0 else \
        10000.0

    processor_options = {}
    processor_options["processor"] = options.processor
    processor_options["w_max"] = max_baseline
    processor_options["padding"] = 1.0
    processor_options["image"] = options.image
    processor_options["threads"] = options.threads
    processor_options["weighttype"] = options.weighttype
    processor_options["rmode"] = options.rmode
    processor_options["noise"] = options.noise
    processor_options["robustness"] = options.robustness
    processor_options["profile"] = options.profile
    processor = processors.create_data_processor(options.ms, processor_options)

    channel_freq = processor.channel_frequency()
    channel_width = processor.channel_width()

    max_freq = numpy.max(channel_freq)
    image_size = 2.0 * util.full_width_half_max(70.0, max_freq)

    # TODO: Cyril mentioned above image size estimation is too conservative.
    # Need to check this and find a better estimate if necessary. For now, will
    # just multiply estimated FOV by 2.0.
    image_size *= 2.0

    (n_px, delta_px) = util.image_configuration(image_size, max_freq,
                                                max_baseline)

    util.notice("image configuration:")
    util.notice("    size: %d x %d pixel" % (n_px, n_px))
    util.notice("    angular size: %.2f deg" % (image_size * 180.0 / numpy.pi))
    util.notice("    angular resolution @ 3 pixel/beam: %.2f arcsec/pixel" %
                (3600.0 * delta_px * 180.0 / numpy.pi))

    # TODO: Need to implement support for multiple channel images. Currently,
    # all data channels are combined into a single MFS image per correlation.
    image_shape = (1, 4, n_px, n_px)
    image_coordinates = pyrap.images.coordinates.coordinatesystem(
        casaimwrap.make_coordinate_system(image_shape[2:],
                                          [delta_px, delta_px],
                                          processor.phase_reference(),
                                          channel_freq, channel_width))

    n_model = 1
    # TODO: Check code for n_model > 1!
    assert (n_model == 1)

    # Comment from CASA source code:
    #
    # Set to search for peak in I^2+Q^2+U^2+V^2 domain or each stokes plane
    # seperately. Ignored for hogbom and msclean for now.
    #    join_stokes = False
    join_stokes = True

    # Compute approximate PSFs.
    util.notice("computing approximate point spread functions...")
    psf = [None for i in range(n_model)]
    beam = [None for i in range(n_model)]
    for i in range(n_model):
        psf[i] = processor.point_spread_function(image_coordinates,
                                                 image_shape)
        fit = casaimwrap.fit_gaussian_psf(image_coordinates.dict(), psf[i])
        assert (fit["ok"])

        beam[i] = BeamParameters((fit["major"] * numpy.pi) / (3600.0 * 180.0),
                                 (fit["minor"] * numpy.pi) / (3600.0 * 180.0),
                                 (fit["angle"] * numpy.pi) / 180.0)

        util.notice(
            "model %d/%d: major axis: %f arcsec, minor axis: %f arcsec,"
            " position angle: %f deg" % (i, n_model - 1, abs(
                fit["major"]), abs(fit["minor"]), fit["angle"]))

    # Validate PSFs.
    (min_psf, max_psf, max_psf_outer, psf_patch_size, max_sidelobe) = \
        validate_psf(image_coordinates, psf, beam)
    clark_options["psf_patch_size"] = psf_patch_size

    updated = [False for i in range(n_model)]
    weight = [None for i in range(n_model)]
    model = [numpy.zeros(image_shape) for i in range(n_model)]
    delta = [numpy.zeros(image_shape) for i in range(n_model)]
    residual = [numpy.zeros(image_shape) for i in range(n_model)]

    if join_stokes:
        iterations = numpy.zeros((n_model, 1, image_shape[0]))
        stokes = ["JOINT"]
        cr_slices = [slice(None)]
    else:
        iterations = numpy.zeros((n_model, image_shape[1], image_shape[0]))
        stokes = image_coordinates.get_coordinate("stokes").get_stokes()
        cr_slices = [slice(i, i + 1) for i in range(4)]

    cycle = 0
    diverged = False
    absmax = options.threshold
    previous_absmax = 1e30

    while absmax >= options.threshold and numpy.max(iterations) \
        < options.iterations and (cycle == 0 or any(updated)):

        util.notice(">> starting major cycle: %d <<" % cycle)

        # Comment from CASA source code:
        #
        # Make the residual images. We do an incremental update for cycles after
        # the first one. If we have only one model then we use convolutions to
        # speed the processing
        util.notice("computing residuals...")

        # TODO: If n_models > 1, need to compute residuals from the sum of
        # the degridded visibilities (see LofarCubeSkyEquation.cc).
        assert (n_model == 1)
        if cycle == 0:
            # Assuming the initial models are zero, the residual visibilities
            # equal the observed visibilities and therefore we only need to
            # grid them.
            for i in range(n_model):
                residual[i], weight[i] = processor.grid(
                    image_coordinates, image_shape,
                    processors.Normalization.FLAT_NOISE)
        else:
            for i in range(n_model):
                if updated[i]:
                    residual[i], weight[i] = \
                        processor.residual(image_coordinates, model[i],
                            processors.Normalization.FLAT_NOISE,
                            processors.Normalization.FLAT_NOISE)
                updated[i] = False

        # Compute residual statistics.
        (absmax, resmin, resmax) = max_field(residual, weight)

        # Print some statistics.
        for i in range(n_model):
            util.notice("model %d/%d: min residual: %f, max residual: %f" %
                        (i, n_model - 1, resmin[i], resmax[i]))
        util.notice("peak residual: %f" % absmax)

        # Comment from CASA source code:
        #
        # Check if absmax is 5% above its previous value.
        #
        # TODO: Value used does not look like 5%?
        if absmax >= 1.000005 * previous_absmax:
            diverged = True
            break

        # Store absmax of this major cycle for later reference.
        previous_absmax = absmax

        # Check stop criterium.
        if absmax < options.threshold:
            break

        # TODO: What is this really used for? And does the max weight indeed
        # correspond to sensitivity in Jy/beam?
        if cycle == 0:
            max_weight = 0.0
            for i in range(n_model):
                max_weight = max(max_weight, numpy.max(weight[i]))
            util.notice("maximum sensitivity: %f Jy/beam" %
                        (1.0 / numpy.sqrt(max_weight)))

        # Comment from CASA source code:
        #
        # Calculate the threshold for this cycle. Add a safety factor
        #
        # fractionOfPsf controls how deep the cleaning should go.
        # There are two user-controls.
        # cycleFactor_p : scale factor for the PSF sidelobe level.
        #                        1 : clean down to the psf sidelobe level
        #                        <1 : go deeper
        #                        >1 : shallower : stop sooner.
        #                        Default : 1.5
        # cycleMaxPsfFraction_p : scale factor as a fraction of the PSF peak
        #                                    must be 0.0 < xx < 1.0 (obviously)
        #                                    Default : 0.8
        fraction_of_psf = min(options.cycle_max_psf_fraction,
                              options.cycle_factor * max_sidelobe)

        if fraction_of_psf > 0.8:
            util.warning(
                "PSF fraction for threshold computation is too"
                " high: %f. Forcing to 0.8 to ensure that the threshold is"
                " smaller than the peak residual!" % fraction_of_psf)
            fraction_of_psf = 0.8  # painfully slow!

        # Update cycle threshold.
        cycle_threshold = max(0.95 * options.threshold,
                              fraction_of_psf * absmax)
        clark_options["cycle_threshold"] = cycle_threshold

        util.notice("minor cycle threshold max(0.95 * %f, peak residual * %f):"
                    " %f" %
                    (options.threshold, fraction_of_psf, cycle_threshold))

        # Execute the minor cycle (Clark clean) for each channel of each model.
        util.notice("starting minor cycle...")
        for i in range(n_model):
            if max(abs(resmin[i]), abs(resmax[i])) < cycle_threshold:
                util.notice("model %d/%d: peak residual below threshold" %
                            (i, n_model - 1))
                continue

            if max_psf[i] <= 0.0:
                util.warning("model %d/%d: point spread function negative or"
                             " zero" % (i, n_model - 1))
                continue

            # Zero the delta image for this model.
            delta[i].fill(0.0)

            for (cr, cr_slice) in enumerate(cr_slices):
                for ch in range(len(residual[i])):
                    # TODO: The value of max_weight is only updated during
                    # cycle 0. Is this correct?
                    #
                    assert (len(weight[i].shape) == 2
                            and weight[i].shape[:2] == residual[i].shape[:2])

                    plane_weight = numpy.sqrt(weight[i][ch, cr_slice] /
                                              max_weight)
                    if numpy.any(plane_weight > 0.01):
                        weight_mask = numpy.ones((residual[i].shape[2:]))
                    else:
                        weight_mask = numpy.zeros((residual[i].shape[2:]))

                    # Call CASA Clark clean implementation (minor cycle).
                    # TODO: When cleaning each Stokes parameter separately,
                    # the PSF of Stokes I is used for all others as well?
                    #
                    # Comment from CASA source code:
                    #
                    # We only want the PSF for the first polarization so we
                    # iterate over polarization LAST.
                    #
                    result = casaimwrap.clark_clean(
                        psf[i][ch, 0, :, :], residual[i][ch, cr_slice, :, :],
                        weight_mask, iterations[i, cr, ch], clark_options)

                    if result["iterations"] > iterations[i, cr, ch]:
                        updated[i] = True
                        delta[i][ch, cr_slice, :, :] = result["delta"]
                        iterations[i, cr, ch] = result["iterations"]
                    else:
                        assert (numpy.all(result["delta"] == 0.0))

                util.notice("model %d/%d: stokes: %s, cleaned: %f Jy, "
                            "iterations per channel: %s" %
                            (i, n_model - 1, stokes[cr],
                             numpy.sum(delta[i][ch, cr_slice, :, :]),
                             str(iterations[i, cr, :])))

        # Update model images if required.
        for i in range(n_model):
            if updated[i]:
                model[i] += delta[i]

        # Update major cycle counter.
        cycle += 1

    if any(updated):
        util.notice("finalizing residual images for all fields...")
        for i in range(n_model):
            if updated[i]:
                residual[i], weight[i] = processor.residual(
                    image_coordinates, model[i],
                    processors.Normalization.FLAT_NOISE,
                    processors.Normalization.FLAT_NOISE)
        (absmax, resmin, resmax) = max_field(residual, weight)

        # Print some statistics.
        for i in range(n_model):
            util.notice("model %d/%d: min residual: %f, max residual: %f" %
                        (i, n_model - 1, resmin[i], resmax[i]))
        util.notice("peak residual: %f" % absmax)
    else:
        util.notice("residual images for all fields are up-to-date...")

    # Store output images.
    util.notice("storing average response...")
    util.store_image(options.image + ".response", image_coordinates,
                     processor.response(image_coordinates, image_shape))

    util.notice("storing model images...")
    for i in range(n_model):
        util.store_image(options.image + ".model.flat_noise",
                         image_coordinates, model[i])
        util.store_image(
            options.image + ".model", image_coordinates,
            processor.normalize(image_coordinates, model[i],
                                processors.Normalization.FLAT_NOISE,
                                processors.Normalization.FLAT_GAIN))

    util.notice("storing residual images...")
    for i in range(n_model):
        util.store_image(options.image + ".residual.flat_noise",
                         image_coordinates, residual[i])
        util.store_image(
            options.image + ".residual", image_coordinates,
            processor.normalize(image_coordinates, residual[i],
                                processors.Normalization.FLAT_NOISE,
                                processors.Normalization.FLAT_GAIN))

    util.notice("storing restored images...")
    for i in range(n_model):
        restored = restore_image(image_coordinates.dict(), model[i],
                                 residual[i], beam[i])

        util.store_image(options.image + ".restored.flat_noise",
                         image_coordinates, restored)
        util.store_image(
            options.image + ".restored", image_coordinates,
            processor.normalize(image_coordinates, restored,
                                processors.Normalization.FLAT_NOISE,
                                processors.Normalization.FLAT_GAIN))

    # Print some statistics.
    for i in range(n_model):
        util.notice(
            "model %d/%d: clean flux: %f, residual rms: %f" %
            (i, n_model - 1, numpy.sum(model[i]), numpy.std(residual[i])))

    if diverged:
        util.error("clean diverged.")
    elif absmax < options.threshold:
        util.notice("clean converged.")
    else:
        util.warning("clean did not reach threshold: %f Jy." %
                     options.threshold)