def data_from_coordinate_pair(save_dir, x, y): # load the global tree, find the zone_id to which this star belongs global_tree_file = save_dir + "/global.json" global_tree = QuadTreeNode.from_file(global_tree_file, leafClass=IDLeaf) leaf = global_tree.find_leaf(x, y) zone_id = leaf.node_id # load the zone, extract the star's data zone_name = apass.name_zone(zone_id) # load the zone's tree and data from disk and get the leaves zone_json = save_dir + apass.name_zone_json_file(zone_id) zone_tree = QuadTreeNode.from_file(zone_json, leafClass=RectLeaf) zone.load_zone_data(zone_tree, save_dir) # find the leaf containing the point of interest leaf = zone_tree.find_leaf(x, y) # export the data data = None for container in leaf.containers: if container.contains(x, y): data = fred.to_fredbin(container.data) return data
def apass_zone_to_dat(save_dir, filter_config, zone_container_filename): """Process all of the rectangles found in the zone. With APASS data, we average on a per-container basis.""" averages = [] zone_id = apass.zone_from_name(zone_container_filename) zone_name = apass.name_zone(zone_id) print "Processing zone " + zone_name # load the zone's tree and data from disk and get the leaves zone_json = save_dir + apass.name_zone_json_file(zone_id) zone_tree = QuadTreeNode.from_file(zone_json, leafClass=RectLeaf) zone.load_zone_data(zone_tree, save_dir) leaves = zone_tree.get_leaves() # average the data in each container. for leaf in leaves: for container in leaf.containers: # average the data c_ave = average_container(container, filter_config) averages.append(c_ave) # write out the average information dat_filename = save_dir + "/" + zone_name + ".dat" averages = dat.dicts_to_ndarray(averages, dat_type="apass") dat.write_dat(dat_filename, averages, dat_type="apass")
def data_from_unique_id(save_dir, zone_id, node_id, container_id): zone_filename = save_dir + apass.name_zone(zone_id) + "-container.fredbin" zone_data = fred.read_fredbin(zone_filename) indices = np.where((zone_data['node_id'] == node_id) & (zone_data['container_id'] == container_id)) data = zone_data[indices] return data
def main(): parser = argparse.ArgumentParser( description='Identifies the state of the pipeline for each zone') parser.add_argument('save_dir', help="Directory where save files can be found") parser.add_argument( 'coords', nargs='+', help= "Coordinates in either a (x,y) pair, or (x_min, y_min, x_max, y_max) quad" ) # parse the command line arguments args = parser.parse_args() coords = map(float, args.coords) num_coords = len(coords) if not (num_coords == 2 or num_coords == 4): print("Coordinates must either be a pair or a quad. See -h") quit() save_dir = os.path.abspath(args.save_dir) + "/" tree_file = save_dir + "/global.json" # load the zone quadtree tree = QuadTreeNode.from_file(tree_file, leafClass=IDLeaf) # execute the search zone_ids = [] if num_coords == 2: # simple (x,y) pair, simply find the leaf x, y = coords leaf = tree.find_leaf(x, y) zone_ids.append(leaf.node_id) elif num_coords == 4: print("Not implemented") for zone_id in zone_ids: print(name_zone(zone_id))
def sro_zone_to_dat(save_dir, filter_config, zone_container_filename): """Processes all of the rectangles found in zone. Zone should be a valid subdirectory of save_dir""" zone_id = apass.zone_from_name(zone_container_filename) zone_name = apass.name_zone(zone_id) print "Processing zone " + zone_name # create a graph data structure for this zone G = nx.Graph() # load the zone's tree and data from disk and get the leaves zone_json = save_dir + apass.name_zone_json_file(zone_id) zone_tree = QuadTreeNode.from_file(zone_json, leafClass=RectLeaf) zone.load_zone_data(zone_tree, save_dir) leaves = zone_tree.get_leaves() # average the data and populate the graph with shared container information line_number = 0 averages = [] for leaf in leaves: for container in leaf.containers: # average the data c_aves = average_by_field(container, filter_config) averages.extend(c_aves) # populate overlapping line information line_numbers = [] field_ids = [] for c_ave in c_aves: line_numbers.append(line_number) field_ids.append(c_ave['field_id']) line_number += 1 # if there are more than one field ID present, populate information # in the graph if len(field_ids) > 1: # generate all possible combinations of the line numbers and field IDs # these express the edges in the graph line_pairs = list(itertools.combinations(line_numbers, 2)) field_pairs = list(itertools.combinations(field_ids, 2)) for line_pair, field_pair in zip(line_pairs, field_pairs): src_line = line_pair[0] dst_line = line_pair[1] src_field = field_pair[0] dst_field = field_pair[1] try: edge = G[src_field][dst_field] except: G.add_edge(src_field, dst_field, line_ids=[], weight=0) edge = G[src_field][dst_field] edge['line_ids'].append((src_line, dst_line)) edge['weight'] += 1 # write out the average information dat_filename = save_dir + "/" + zone_name + ".dat" averages = dat.dicts_to_ndarray(averages, dat_type="sro") dat.write_dat(dat_filename, averages, dat_type="sro") # save the graph to a pickle file graph_filename = save_dir + "/" + zone_name + ".p" nx.write_gpickle(G, graph_filename)
def main(): """Plots all data in an APASS zone and optionally displays container boundaries.""" parser = argparse.ArgumentParser( description='Plots a zone file and its data') parser.add_argument('input', nargs='+', help="Zone JSON files to be plotted") parser.add_argument('--show-containers', default=False, action="store_true", help="Plot borders for containers") args = parser.parse_args() save_dir = os.path.dirname(os.path.realpath(args.input[0])) + "/" inputs = args.input for filename in inputs: zone_id = apass.zone_from_name(filename) zone_name = apass.name_zone(zone_id) print "Plotting zone " + zone_name # load the original zone data. Note, we don't restore it to the tree zone_data_file = save_dir + apass.name_zone_file(zone_id) zone_data = fred.read_fredbin(zone_data_file) print("Zone file has " + str(zone_data.size) + " entries") # load the containerized zone data zone_container_file = save_dir + apass.name_zone_container_file( zone_id) zone_container_data = fred.read_fredbin(zone_container_file) print("Zone container file has " + str(zone_container_data.size) + " entries") # load the zone's tree zone_json = save_dir + apass.name_zone_json_file(zone_id) zone_tree = QuadTreeNode.from_file(zone_json, leafClass=RectLeaf) leaves = zone_tree.get_leaves() total_containers = 0 for leaf in leaves: total_containers += len(leaf.containers) print("Zone contains a total of " + str(total_containers) + " containers") fig, axes = plt.subplots(1) # plot the zone container data for leaf in leaves: rect = leaf.rect x = rect.x_min y = rect.y_min dx = rect.x_max - x dy = rect.y_max - y axes.add_patch(patches.Rectangle((x, y), dx, dy, fill=False)) if args.show_containers: plot_containers(leaf, axes) # plot the data dec = zone_data['dec'] ra = zone_data['ra'] plt.scatter(ra, dec) dec = zone_container_data['dec'] ra = zone_container_data['ra'] plt.scatter(ra, dec, color="green") plt.show()
def main(): parser = argparse.ArgumentParser( description='Identifies the state of the pipeline for each zone') parser.add_argument('save_dir', help="Directory where save files can be found") # parse the command line arguments and start timing the script args = parser.parse_args() save_dir = os.path.abspath(args.save_dir) + "/" tree_file = save_dir + "/global.json" output_file = save_dir + "/broken_zones.log" # get a list of all of the files in the save directory saved_files = glob.glob(save_dir + "z*") # load the zone quadtree tree = QuadTreeNode.from_file(tree_file, leafClass=IDLeaf) # init a structure to store stage -> zone mapping information stages = dict() for extension in expected_extensions: stages[extension] = [] # find the broken zones poles_checked = [False, False] leaves = tree.get_leaves() for leaf in leaves: zone_id = leaf.node_id zone_name = name_zone(zone_id) # The north/south zone IDs appear multiple times in the tree. # Ensure we only visit these once. if zone_id == south_zone_id or zone_id == north_zone_id: if poles_checked[zone_id]: continue else: poles_checked[zone_id] = True # determine which, if any, files are missing for this zone: missing_extensions = [] for extension in expected_extensions: filename = save_dir + zone_name + extension if filename in saved_files: saved_files.remove(filename) else: missing_extensions.append(extension) stages[extension].append(filename) if len(missing_extensions) > 0: # print a message on the console print str(zone_id) + ": " + ", ".join(missing_extensions) missing_file = save_dir + '/missing.txt' for extension in expected_extensions: missing_files = stages[extension] if len(missing_files) > 0: print("The earliest files missing are %s." % (extension)) print( "Modify the 'missing.txt' file for the previous stage input " + "files and run the corresponding stage of the pipeline") with open(missing_file, 'w') as outfile: for filename in missing_files: outfile.write(filename + "\n") break