def process_obs_to_html(dataset_file):
    """
    Looks up each Observation in the MOLES catalogue, matches phenomena to it 
    from ES and then writes HTML pages listing them. 
    """
    lines = util.read_file_into_list(dataset_file)
    summary_info = []
    counter = 0
    for line in lines:
        path = line.split("=")[1].rstrip()
        try:
            print "searching path {}".format(path)
            results = fbs_api.get_dir_info(path)
        except:
            continue

        #if len(results["formats"]) > 0:
            #print "Formats in directory {} are {} and some files {}".format(data_path, results["formats"], results["sample_names"])
        record = (line, results["formats"], results["sample_names"])
        summary_info.append(record)
        #counter += 1
        #if counter >10:
        #    break

    print create_html_table(summary_info)
Beispiel #2
0
def parse_logs(com_args):

    log_directory = com_args["log_directory"]
    datasets_file = com_args["filename"]

    #find all files in log directroy.
    list_of_files = util.build_file_list(log_directory)
    num_files = len(list_of_files)

    summary_info = {}

    #open each file and exrtact info.
    for i in range(0, num_files):

        filename = list_of_files[i]
        content_list = util.read_file_into_list(filename)
        summary = util.find_in_list(content_list, "Summary")

        if summary is not None:
            words_list = summary.split("Summary", 1)[1].split(",")
            #dataset
            dataset = (words_list[0].split())[5]
            #indexed
            indexed = int(words_list[1].split()[3])
            #database errors
            database_errors = int(words_list[2].split()[3])
            #properties errors
            properties_errors = int(words_list[3].split()[3])
            #total files
            total_files = int(words_list[4].split()[3])

            if dataset not in  summary_info:
                dataset_info = {}
                #dataset_info["dataset"] = dataset
                dataset_info["indexed"] = indexed
                dataset_info["database_errors"] = database_errors
                dataset_info["properties_errors"] = properties_errors
                dataset_info["total_files"] = total_files
                dataset_info["dataset_dir"] = util.find_dataset(datasets_file, dataset)

                summary_info[dataset] = dataset_info.copy()
                dataset_info = None
            else:
                dataset_info = {}
                dataset_info = summary_info[dataset]
                dataset_info["indexed"] = dataset_info["indexed"] + indexed
                dataset_info["database_errors"] = dataset_info["database_errors"] + database_errors
                dataset_info["properties_errors"] = dataset_info["properties_errors"] + properties_errors
                #dataset_info["total_files"] = dataset_info["total_files"] + total_files
                dataset_info = None
        #At the end print all information.

    return summary_info
Beispiel #3
0
def sample_files(in_path, out_path):

    #Get basic options.

    #Go to directory and create the file list.
    list_of_cache_files = util.build_file_list(in_path)
    counter = 0

    for filename in list_of_cache_files:
        contents = util.read_file_into_list(filename)
        new_file_name = os.path.join(out_path, os.path.basename(filename) + "-sample")
        fd = open(new_file_name, "a")
        for item in contents:
            if item.rstrip().endswith(".pp"):
                fd.write(item)
                counter = counter + 1
                if counter > 1000:
                    break