def testChunkFileListsRW(self):
        # Test reading and writing to disk
        dummyf = "/tmp/tmpchunktest"
        tdata = TData()

        clf_target = chunklogs.ChunkListFile(dummyf)
        clf_target.parse(tdata.good_raw)
        clf_target.write()

        # base ChunkFilesList off of clf_target and write
        clogs = chunklogs.ChunkLogs(dummyf)
        clogs.build(tdata.valid_ids)
        # create outputs
        clogs_out = clogs.createOutput("/tmp/")
        clogs_out.write()

        clogs_out.addCompleted(tdata.completed)
        clogs_out.addLimbo(tdata.limbo)
        clogs_out.addAssigned(tdata.assigned)

        tf, cf, af, lf = chunklogs.ChunkLogs.createNames("/tmp")
        clogs_read = chunklogs.ChunkLogs(tf, cf, af, lf)

        clogs_read.build(tdata.valid_ids)
        self.assertSetEqual(clogs_out._target.chunk_set,
                            clogs_read._target.chunk_set)
        self.assertSetEqual(clogs_out._completed.chunk_set,
                            clogs_read._completed.chunk_set)
        self.assertSetEqual(clogs_out._assigned.chunk_set,
                            clogs_read._assigned.chunk_set)
        self.assertSetEqual(clogs_out._limbo.chunk_set,
                            clogs_read._limbo.chunk_set)

        self.assertSetEqual(clogs_read.result_set, set(tdata.result_expected))
Esempio n. 2
0
def server():
    """Start the server.
    """
    argumentList = sys.argv[1:]
    print("argumentList=", argumentList)
    options = "ha:ksc:g:i:o:r:z"
    long_options = [
        "help", "authIngest", "skipIngest", "skipSchema", "configfile",
        "outDir", "inDir", "raw", "keepCsv"
    ]
    auth_ingest = ""
    skip_ingest = False
    skip_schema = False
    config_file = "serverCfg.yml"
    ingest_host = "127.0.0.1"
    in_dir = None
    out_dir = ""
    raw = None
    keep_csv = False
    try:
        arguments, values = getopt.getopt(argumentList, options, long_options)
        print("arguments=", arguments)
        for arg, val in arguments:
            if arg in ("-h", "--help"):
                usage()
                return False
            elif arg in ("-a", "--authIngest"):
                auth_ingest = val
            elif arg in ("-k", "--skipIngest"):
                skip_ingest = True
            elif arg in ("-s", "--skipSchema"):
                skip_schema = True
            elif arg in ("-c", "--configfile"):
                config_file = val
            elif arg in ("-g", "--ingestHost"):
                ingest_host = val
            elif arg in ("-o", "--outDir"):
                out_dir = val
            elif arg in ("-i", "--inDir"):
                in_dir = val
            elif arg in ("-r", "--raw"):
                raw = val
            elif arg in ("-z", "--keepCsv"):
                keep_csv = True
    except getopt.error as err:
        print(str(err))
        exit(1)
    print("skip_ingest=", skip_ingest, "skip_schema=", skip_schema, "values=",
          values)
    print(f"configfile={config_file} in_dir={in_dir} raw={raw}\n")

    # Check that configFile exists and make it the absolute path
    abs_path_cwd = Path.cwd()
    config_file_path = abs_path_cwd / "localConfig" / config_file
    if not config_file_path.is_file():
        print(
            f"ERROR: config_file {config_file} -> {config_file_path} is not a file, exiting"
        )
        exit(1)

    print("config_file_path", config_file_path)
    # Replace #INGEST_HOST# with ingest_host in the file
    # Replace #INGEST_AUTH with auth_ingest in the file
    with open(config_file_path, 'r') as cfg_file:
        cfg_contents_in = cfg_file.read()
    with open(config_file_path, 'w') as cfg_file:
        cfg_out = cfg_contents_in.replace('#INGEST_HOST#', ingest_host)
        cfg_out = cfg_out.replace('#INGEST_AUTH#', auth_ingest)
        cfg_file.write(cfg_out)

    # If in_dir is defined (empty string is valid), see if files can be found
    if in_dir is not None:
        # Throws if targetf not found
        targetf, completedf, assignedf, limbof = chunklogs.ChunkLogs.checkFiles(
            in_dir)
        print(
            f"target={targetf} completed={completedf} assigned={assignedf} limbo={limbof}"
        )
        clfs = chunklogs.ChunkLogs(targetf, completedf, assignedf, limbof, raw)
    else:
        clfs = chunklogs.ChunkLogs(None, raw=raw)

    dgServ = DataGenServer(config_file_path, clfs, out_dir, skip_ingest,
                           skip_schema, keep_csv)
    if dgServ.chunksToSendTotal() == 0:
        print("No chunks to generate, exiting.")
        exit(0)
    dgServ.start()
Esempio n. 3
0
if __name__ == "__main__":
    argument_list = sys.argv[1:]
    print("argumentList=", argument_list)
    options = "hi:"
    long_options = ["help", "inDir"]
    in_dir = ""
    try:
        arguments, values = getopt.getopt(argument_list, options, long_options)
        print("arguments=", arguments)
        for arg, val in arguments:
            if arg in ("-h", "--help"):
                usage()
                exit(1)
            elif arg in ("-i", "--inDir"):
                in_dir = val
    except getopt.error as err:
        print(str(err))
        print(usage())
        exit(1)
    print(f"in_dir={in_dir}\n")
    # If in_dir is defined (empty string is valid), see if files can be found
    # Throws if targetf not found
    targetf, completedf, assignedf, limbof = chunklogs.ChunkLogs.checkFiles(
        in_dir)
    print(
        f"target={targetf}\ncompleted={completedf}\nassigned={assignedf}\nlimbo={limbof}\n"
    )
    clogs = chunklogs.ChunkLogs(targetf, completedf, assignedf, limbof, None)
    clogs.build(None)
    print(clogs.report())
 def testChunkFileLists(self):
     tdata = TData()
     clogs = chunklogs.ChunkLogs(None, raw=tdata.lists_raw)
     clogs.build(tdata.valid_ids)
     self.assertSetEqual(clogs.result_set, set(tdata.lists_expected))
     print(clogs.report())
    def test_chunk_trakcing(self):
        with tempfile.TemporaryDirectory() as log_dir:
            clfs = chunklogs.ChunkLogs(None, raw='0:1000')
            db_name = 'junk_db'
            # ingest will not be contacted in unit tests
            skip_ingest = True
            skip_schema = True
            keep_csv = True
            ingest_dict = {
                'host': '127.0.0.1',
                'port': 25080,
                'auth': '',
                'db': db_name,
                'skip': skip_ingest,
                'keep': keep_csv
            }
            c_t = chunktracking.ChunkTracking(local_chunker, clfs, 100,
                                              skip_ingest, skip_schema,
                                              log_dir, ingest_dict)
            self.assertSetEqual(c_t._chunks_to_send_set, valid_chunks)

            client_chunks, transaction_id = c_t.get_chunks_for_client(
                7, "some.pc.edu", 5)
            print(f"t_id={transaction_id} client_chunks={client_chunks}")
            print(f" {c_t._transaction}")

            self.assertTrue(c_t._transaction.id == transaction_id)
            self.assertTrue(
                c_t._transaction == c_t._transaction_dict[transaction_id])
            self.assertSetEqual(c_t._transaction.total_chunks,
                                c_t._transaction.chunks.union(client_chunks))
            self.assertTrue(
                c_t._transaction.total_chunks != c_t._transaction.chunks)
            self.assertTrue(c_t._transaction.chunks.isdisjoint(client_chunks))

            # Pretend that the chunks were sent to the client and the client created all of them
            completed_chunks = client_chunks.copy()
            c_t.client_results(transaction_id, client_chunks, completed_chunks)
            self.assertFalse(c_t._transaction.closed)
            self.assertFalse(c_t._transaction.abort)

            # loop through until nothing left to send
            first = True
            cl_chunks = set()
            while cl_chunks or first:
                first = False
                print("loop start")
                cl_chunks, t_id = c_t.get_chunks_for_client(
                    12, "some.pc.edu", 5)
                self.assertTrue(c_t._transaction.chunks.isdisjoint(cl_chunks))
                self.assertFalse(c_t._transaction.closed)
                self.assertFalse(c_t._transaction.abort)

                # Check that Transactions contain appropriate sets.
                chunks_in_all_transactions = set()
                for t_id, t_val in c_t._transaction_dict.items():
                    chunks_in_all_transactions = chunks_in_all_transactions | t_val.total_chunks
                self.assertTrue(
                    c_t._chunks_to_send_set.isdisjoint(
                        chunks_in_all_transactions))
                union_to_send_all_trans = c_t._chunks_to_send_set | chunks_in_all_transactions
                self.assertSetEqual(c_t._chunks_entire_set,
                                    union_to_send_all_trans)

                # Pretend that the chunks were sent to the client and the client created all of them
                completed_chunks = cl_chunks.copy()
                c_t.client_results(t_id, cl_chunks, completed_chunks)

            print(f"c_t._transaction={c_t._transaction}")

            self.assertTrue(c_t._transaction.closed)
            self.assertFalse(c_t._transaction.abort)
            self.assertTrue(len(c_t._chunks_to_send_set) == 0)
            self.assertSetEqual(c_t._chunks_entire_set,
                                c_t._chunk_logs._completed.chunk_set)
        return