Ejemplo n.º 1
0
def retrieve_data_csv(folder, number):
    """
    For reading csv data set files.

    :param folder: Name of data set folder.
    :param number: Number of files to read.

    :return: The concatenated catalogue with epoch numbers for all sources included.
    """

    files = []  # To store list of target csv files
    epoch = 0
    run_partials = False

    os.chdir("%s" % folder)
    for file in glob.glob("*.csv"):
        if epoch < number:
            files.append(file)
            epoch += 1
        else:
            break
    print(files)
    epoch = 0

    # load tables
    for filename in files:
        tab = table.Table.read("%s" % filename)

        # rename columns
        tab.rename_column("# src", "source")
        tab.rename_column("RA(deg)", "ra")
        tab.rename_column("err_RA(deg)", "err_ra")
        tab.rename_column("Dec(deg)", "dec")
        tab.rename_column("err_Dec(deg)", "err_dec")
        tab.rename_column("Flux(Jy)", "peak_flux")
        tab.rename_column("err_Flux(Jy)", "err_peak_flux")

        # add epoch column
        tab['island'] = epoch

        if epoch == 0:
            frames = tab
            base_length = len(tab)
        else:
            frames = vstack([frames, tab])
            if len(tab) != base_length:
                run_partials = True
        epoch += 1

    print(frames)
    write_table(frames, "./results/frames%d.csv" % number)

    cat = table_to_source_list(frames)
    print(cat)

    return {'cat': cat, 'run_partial': run_partials}
Ejemplo n.º 2
0
def retrieve_data_fits(folder, number):
    """
    For reading fits data set files.

    :param folder: Name of data set folder.
    :param number: Number of files to read.

    :return: The concatenated catalogue with epoch numbers for all sources included.
    """
    #astrogpy.io fits

    files = []  # To store list of target csv files
    epoch = 0
    run_partials = False

    os.chdir("%s" % folder)
    for file in glob.glob("*.fits"):
        if epoch < number:
            files.append(file)
            epoch += 1
        else:
            break
    print(files)
    epoch = 0

    # load tables
    for filename in files:
        tab = table.Table.read("%s" % filename)

        tab['island'] = epoch
        tab['source'][:] = range(0, len(tab))

        if epoch == 0:
            frames = tab
            base_length = len(tab)
        else:
            frames = vstack([frames, tab])
            if len(tab) != base_length:
                run_partials = True
        epoch += 1

    print(frames)
    write_table(frames, "./results/frames%d.csv" % number)

    cat = table_to_source_list(frames)
    print(cat)

    return {'cat': cat, 'run_partial': run_partials}
Ejemplo n.º 3
0
def group_iter(catalog, eps, min_members=1):
    """
    :param catalog: List of sources, or filename of a catalog
    :param eps: Clustering radius in *degrees*
    :param min_members: Minimum number of members to form a cluster, default=1
    :yield: lists of sources, one list per group. No particular order.
    """
    import sklearn
    import sklearn.cluster

    if isinstance(catalog, str):
        table = load_table(catalog)
        srccat = table_to_source_list(table)
    elif isinstance(catalog, list):
        try:
            srccat = catalog
        except AttributeError:
            logging.error(
                "Catalog is either not iterable, or its elements have not ra/dec attributes"
            )
            sys.exit(1)
    else:
        logging.error("I don't know what catalog is")
        sys.exit(1)

    log.info("Regrouping islands within catalog")
    log.debug("Calculating distances")

    srccat = np.array(sorted(srccat, key=lambda x: x.dec))
    X = pairwise_ellpitical_binary(srccat, eps)

    log.debug("Clustering")
    samples, labels = sklearn.cluster.dbscan(X,
                                             eps=0.5,
                                             min_samples=min_members,
                                             metric='precomputed')
    # remove repeats and the noise flag of -1
    unique_labels = set(labels).difference(set([-1]))
    # Return groups of sources
    for l in unique_labels:
        class_member_mask = (labels == l)
        yield srccat[class_member_mask]
Ejemplo n.º 4
0
def group_iter(catalog, eps, min_members=1):
    """
    :param catalog: List of sources, or filename of a catalog
    :param eps: Clustering radius in *degrees*
    :param min_members: Minimum number of members to form a cluster, default=1
    :yield: lists of sources, one list per group. No particular order.
    """
    import sklearn
    import sklearn.cluster

    if isinstance(catalog,str):
        table = load_table(catalog)
        srccat = table_to_source_list(table)
    elif isinstance(catalog,list):
        try:
            srccat = catalog
        except AttributeError:
            logging.error("Catalog is either not iterable, or its elements have not ra/dec attributes")
            sys.exit(1)
    else:
        logging.error("I don't know what catalog is")
        sys.exit(1)

    log.info("Regrouping islands within catalog")
    log.debug("Calculating distances")

    srccat = np.array(sorted(srccat, key = lambda x: x.dec))
    X = pairwise_ellpitical_binary(srccat,eps)

    log.debug("Clustering")
    samples, labels = sklearn.cluster.dbscan(X,eps=0.5, min_samples=min_members, metric='precomputed')
    # remove repeats and the noise flag of -1
    unique_labels = set(labels).difference(set([-1]))
    # Return groups of sources
    for l in unique_labels:
        class_member_mask = (labels == l)
        yield srccat[class_member_mask]
Ejemplo n.º 5
0
def regroup(catalog, eps, far=None):
    """
    Regroup the islands of a catalog according to their normalised distance
    return a list of island groups, sources have their (island,source) parameters relabeled
    :param catalog: A list of sources sorted by declination
    :param eps: maximum normalised distance within which sources are considered to be grouped
    :param far: (degrees) sources that are further than this distance appart will not be grouped, and will not be tested
    :return: groups of sources
    """
    if isinstance(catalog, str):
        table = load_table(catalog)
        srccat = table_to_source_list(table)
    else:
        try:
            srccat = catalog
            _ = catalog[0].ra
            _ = catalog[0].dec

        except AttributeError:
            log.error("catalog is as list of something that has no ra/dec attributes")
            sys.exit(1)

    log.info("Regrouping islands within catalog")
    log.debug("Calculating distances")

    # most negative declination first
    srccat = sorted(srccat, key = lambda x: x.dec)

    if far is None:
        far = 0.5 # 10*max(a.a/3600 for a in srccat)

    groups = {0: [srccat[0]]}
    last_group = 0

    # to parallelize this code, break the list into one part per core
    # compute the groups within each part
    # when the groups are found, check the last/first entry of pairs of groups to see if they need to be joined together
    for s1 in srccat[1:]:
        done = False
        # when an islands largest (last) declination is smaller than decmin, we don't need to look at any more islands
        decmin = s1.dec - far
        for g in xrange(last_group, -1, -1):
            if groups[g][-1].dec < decmin:
                break
            rafar = far / np.cos(np.radians(s1.dec))
            for s2 in groups[g]:
                if abs(s2.ra - s1.ra) > rafar:
                    continue
                if norm_dist(s1, s2) < eps:
                    groups[g].append(s1)
                    done = True
                    break
            if done:
                break
        if not done:
            last_group += 1
            groups[last_group] = [s1]

    islands = []
    # now that we have the groups, we relabel the sources to have (island,component) in flux order
    # note that the order of sources within an island list is not changed - just their labels
    for isle in groups.keys():
        for comp, src in enumerate(sorted(groups[isle], key=lambda x: -1*x.peak_flux)):
            src.island = isle
            src.source = comp
        islands.append(groups[isle])
    return islands
Ejemplo n.º 6
0
    srccat = np.array(sorted(srccat, key = lambda x: x.dec))
    X = pairwise_ellpitical_binary(srccat,eps)

    log.debug("Clustering")
    samples, labels = sklearn.cluster.dbscan(X,eps=0.5, min_samples=min_members, metric='precomputed')
    # remove repeats and the noise flag of -1
    unique_labels = set(labels).difference(set([-1]))
    # Return groups of sources
    for l in unique_labels:
        class_member_mask = (labels == l)
        yield srccat[class_member_mask]


if __name__ == "__main__":
    logging.basicConfig()
    log = logging.getLogger('Aegean')
    catalog = '1904_comp.vot'
    catalog = 'GLEAM_IDR1.fits'
    table = load_table(catalog)
    positions = np.array(zip(table['ra'],table['dec']))
    srccat = list(table_to_source_list(table))
    # make the catalog stupid big for memory testing.
    #for i in xrange(5):
    #    srccat.extend(srccat)
    groups = regroup(srccat, eps=np.sqrt(2),far=0.277289506048)
    print "Sources ", len(table)
    print "Groups ", len(groups)
    for g in groups[:50]:
        print len(g),[(a.island,a.source) for a in g]
Ejemplo n.º 7
0
def regroup(catalog, eps, far=None):
    """
    Regroup the islands of a catalog according to their normalised distance
    return a list of island groups, sources have their (island,source) parameters relabeled
    :param catalog: A list of sources sorted by declination
    :param eps: maximum normalised distance within which sources are considered to be grouped
    :param far: (degrees) sources that are further than this distance appart will not be grouped, and will not be tested
    :return: groups of sources
    """
    if isinstance(catalog, str):
        table = load_table(catalog)
        srccat = table_to_source_list(table)
    else:
        try:
            srccat = catalog
            _ = catalog[0].ra
            _ = catalog[0].dec

        except AttributeError:
            log.error(
                "catalog is as list of something that has no ra/dec attributes"
            )
            sys.exit(1)

    log.info("Regrouping islands within catalog")
    log.debug("Calculating distances")

    # most negative declination first
    srccat = sorted(srccat, key=lambda x: x.dec)

    if far is None:
        far = 0.5  # 10*max(a.a/3600 for a in srccat)

    groups = {0: [srccat[0]]}
    last_group = 0

    # to parallelize this code, break the list into one part per core
    # compute the groups within each part
    # when the groups are found, check the last/first entry of pairs of groups to see if they need to be joined together
    for s1 in srccat[1:]:
        done = False
        # when an islands largest (last) declination is smaller than decmin, we don't need to look at any more islands
        decmin = s1.dec - far
        for g in xrange(last_group, -1, -1):
            if groups[g][-1].dec < decmin:
                break
            rafar = far / np.cos(np.radians(s1.dec))
            for s2 in groups[g]:
                if abs(s2.ra - s1.ra) > rafar:
                    continue
                if norm_dist(s1, s2) < eps:
                    groups[g].append(s1)
                    done = True
                    break
            if done:
                break
        if not done:
            last_group += 1
            groups[last_group] = [s1]

    islands = []
    # now that we have the groups, we relabel the sources to have (island,component) in flux order
    # note that the order of sources within an island list is not changed - just their labels
    for isle in groups.keys():
        for comp, src in enumerate(
                sorted(groups[isle], key=lambda x: -1 * x.peak_flux)):
            src.island = isle
            src.source = comp
        islands.append(groups[isle])
    return islands
Ejemplo n.º 8
0
    log.debug("Clustering")
    samples, labels = sklearn.cluster.dbscan(X,
                                             eps=0.5,
                                             min_samples=min_members,
                                             metric='precomputed')
    # remove repeats and the noise flag of -1
    unique_labels = set(labels).difference(set([-1]))
    # Return groups of sources
    for l in unique_labels:
        class_member_mask = (labels == l)
        yield srccat[class_member_mask]


if __name__ == "__main__":
    logging.basicConfig()
    log = logging.getLogger('Aegean')
    catalog = '1904_comp.vot'
    catalog = 'GLEAM_IDR1.fits'
    table = load_table(catalog)
    positions = np.array(zip(table['ra'], table['dec']))
    srccat = list(table_to_source_list(table))
    # make the catalog stupid big for memory testing.
    #for i in xrange(5):
    #    srccat.extend(srccat)
    groups = regroup(srccat, eps=np.sqrt(2), far=0.277289506048)
    print "Sources ", len(table)
    print "Groups ", len(groups)
    for g in groups[:50]:
        print len(g), [(a.island, a.source) for a in g]
Ejemplo n.º 9
0
def regroup(catalog, eps, number, far=None, dist=None, partial=None):
    """
    Regroup the islands of a catalog according to their normalised distance.
    Return a list of island groups. Sources have their (island,source) parameters relabeled.


    Parameters
    ----------
    catalog : str or object
        Either a filename to read into a source list, or a list of objects with the following properties[units]:
        ra[deg],dec[deg], a[arcsec],b[arcsec],pa[deg], peak_flux[any]

    eps : float
        maximum normalised distance within which sources are considered to be grouped

    far : float
        (degrees) sources that are further than this distance appart will not be grouped, and will not be tested.
        Default = None.

    dist : func
        a function that calculates the distance between two sources must accept two SimpleSource objects.
        Default = :func:`AegeanTools.cluster.norm_dist`

    Returns
    -------
    islands : list
        A list of islands. Each island is a list of sources.

    See Also
    --------
    :func:`AegeanTools.cluster.norm_dist`
    """

    if isinstance(catalog, str):
        table = load_table(catalog)
        srccat = table_to_source_list(table)
    else:
        try:
            srccat = catalog
            _ = catalog[0].ra, catalog[0].err_ra, catalog[0].dec, catalog[
                0].err_dec, catalog[0].int_flux, catalog[0].err_int_flux

        except AttributeError as e:
            log.error("catalog is not understood.")
            log.error(
                "catalog: Should be a list of objects with the following properties[units]:\n"
                +
                "ra[deg],dec[deg], a[arcsec],b[arcsec],pa[deg], peak_flux[any]"
            )
            raise e

    log.info("Regrouping islands within catalog")
    log.debug("Calculating distances")

    # most negative declination first
    srccat = sorted(srccat, key=lambda x: x.dec)

    if far is None:
        far = 0.5  # 10*max(a.a/3600 for a in srccat)

    groups = {0: [srccat[0]]}
    last_group = 0

    # to parallelize this code, break the list into one part per core
    # compute the groups within each part
    # when the groups are found, check the last/first entry of pairs of groups to see if they need to be joined together
    for s1 in srccat[1:]:
        done = False
        # when an islands largest (last) declination is smaller than decmin, we don't need to look at any more islands
        decmin = s1.dec - far
        for g in range(last_group, -1, -1):
            if groups[g][-1].dec < decmin:
                break
            rafar = far / np.cos(np.radians(s1.dec))
            for s2 in groups[g]:
                if abs(s2.ra - s1.ra) > rafar:
                    continue
                if dist(s1, s2) < eps:
                    groups[g].append(s1)
                    done = True
                    break
            if done:
                break
        if not done:
            last_group += 1
            groups[last_group] = [s1]

    islands = []
    bug_counter = 0

    # For checking objects don't have multiple presences in one group.
    for isle in groups.keys():
        bad = False
        if len(groups[isle]) % number == 0 or partial is not None:
            max_allowable = int(math.ceil(len(groups[isle]) / number))
            counter_list = []
            for i in range(number):
                counter_list.extend([0])
            for k in range(len(groups[isle])):
                if counter_list[groups[isle][k].island] == max_allowable:
                    bad = True
                    bug_counter += 1
                    for h in range(len(groups[isle])):
                        islands.append([groups[isle][h]])
                    break
                else:
                    counter_list[groups[isle][k].island] += 1
        if not bad:
            islands.append(groups[isle])

    return {'islands': islands, 'bug_counter': bug_counter}