Exemple #1
0
def mmultfile_ata_piece(a_filename,
                        offset,
                        work_index=0,
                        work_count=1,
                        log_frequency=-1,
                        force_python_only=False):
    t0_gtg = time.time()
    if log_frequency > 0:
        logging.info("ata_piece: Working on piece {0} of {1}.".format(
            work_index, work_count))

    a = SnpMemMap(a_filename)

    def debatch_closure(work_index2):
        return a.sid_count * work_index2 // work_count

    start = debatch_closure(work_index)
    stop = debatch_closure(work_index + 1)

    ata_piece = np.zeros((a.sid_count - start, stop - start), order='C')

    if force_python_only:
        with open(a.filename, "rb") as fp:
            fp.seek(a.offset + start * a.iid_count * 8)
            slice = np.fromfile(fp,
                                dtype=np.float64,
                                count=a.iid_count * (stop - start)).reshape(
                                    a.iid_count, stop - start, order="F")
            for i in xrange(work_index, work_count):
                starti = debatch_closure(i)
                stopi = debatch_closure(i + 1)
                if i > work_index:
                    slicei = np.fromfile(fp,
                                         dtype=np.float64,
                                         count=a.iid_count *
                                         (stopi - starti)).reshape(a.iid_count,
                                                                   stopi -
                                                                   starti,
                                                                   order="F")
                else:
                    slicei = slice
                if i % log_frequency == 0 and log_frequency > 0:
                    logging.info("{0}/{1}".format(i, work_count))
                ata_piece[starti - start:stopi - start, :] = np.dot(
                    slicei.T, slice)
    else:
        mmultfile_atax(a_filename,
                       a.offset,
                       a.iid_count,
                       a.sid_count,
                       work_index,
                       work_count,
                       ata_piece,
                       num_threads=get_num_threads(),
                       log_frequency=log_frequency)

    if log_frequency > 0:
        logging.info("ata_piece {0} of {1}: clocktime {2}".format(
            work_index, work_count, format_delta(time.time() - t0_gtg)))
    return ata_piece
Exemple #2
0
        idx = G0_memmap.pos[:,0]!=2
        inonzero = np.array([True]*sum(idx))
        inonzero[0] = False
        logging.info("Generating random {0}x{0}".format(sum(idx)))
        if sum(idx) < 10000:
            np.random.seed(0)
            SVinv3 = np.random.random((sum(idx),sum(idx)))
        else:
            SVinv3 = np.zeros((sum(idx),sum(idx)))
            np.fill_diagonal(SVinv3,val=1)
        logging.info("Done Generating random")
        local_fn_U = r"d:\deldir\local_fn_U.memmap"
        t0 = time.time()
        U_memmap = post_svd(local_fn_U, G0_memmap, idx, SVinv3, inonzero, memory_factor, runner, do_original=do_original,force_python_only=force_python_only,log_frequency=log_frequency)
        print(U_memmap.val)
        logging.info("clocktime {0}".format(format_delta(time.time()-t0)))
        print("done")
        


    elif test_ata:
        if do_fast:
            filename = r"D:\deldir\test\_Storage\very_small_0.8\G0_data.memmap"
        else:
            filename = r"D:\deldir\scratch\escience\carlk\cachebio\genetics\onemil\fc\bigsyn0\U1.memmap"

        work_count = 3
        force_python_only = False

        a = SnpMemMap(filename)
        def memmap_lambda():
Exemple #3
0
    def _simple_open_read(self, simple_file_name, updater=None):

        logging.debug("open_read('{0}')".format(simple_file_name))
        #
        #Returns name of local file and locks that local until released
        #

        #Try to Get locally, return it if worked
        #Is there enough room? If not delete some files. If still not, fail
        #Ask origin for randomized list of copies (the last one will be the origin, but not all have to be included)
        #Try getting each copy. If all fail, then fail
        #Register local copy with the origin
        dir_path = self.common_directory.join(simple_file_name)
        unique_name, root = self.id_and_path_function()
        local_path = root + "/" + simple_file_name
        pstutil.create_directory_if_necessary(local_path, isfile=True)
        copy_name = "copy_{0}.txt".format(unique_name)
        main_path = self._robust_load_main(dir_path)
        file_size = os.path.getsize(main_path)

        if os.path.exists(local_path):
            if main_path == local_path or dir_path.file_exists(copy_name):
                logging.info("\tfound local")
                assert file_size == os.path.getsize(
                    local_path
                ), "Local file doesn't have the same size as the main file"
                yield local_path
                return
            else:
                logging.info("\tlost local found. Will remove")
                os.remove(local_path)
                pstutil.create_directory_if_necessary(local_path, isfile=True)

        dib_lib = _DibLib(unique_name, dir_path, dir_path, "dibs")
        try:
            dib_lib.wait_for_turn()

            #Now we can copy. Choose a source at random
            for storage_path, storage_count in self._far_file_sequence(
                    dir_path
            ):  # In a loop because in the future may want to handle copies being deleted.
                try:  #If something goes wrong, try the next one
                    self._net_use(storage_path)
                    assert file_size == os.path.getsize(
                        storage_path
                    ), "File to copy from doesn't have the same size as the main file"
                    if psutil.disk_usage(
                            os.path.dirname(local_path)
                    ).free - file_size < self.leave_space:  #clean up the directories
                        #Check every local file and if it is not in the directory (e.g. a temp file from other programs), remove it
                        local_dir = os.path.split(local_path)[0]
                        for other_file in os.listdir(local_dir):
                            full_file = local_dir + "/" + other_file
                            if os.path.isfile(
                                    full_file
                            ) and not self._simple_file_exists(other_file):
                                logging.info(
                                    "\tNeed space and file isn't in directory so removing it. '{0}'"
                                    .format(full_file))
                                os.remove(full_file)
                    logging.info("\tshutil.copyfile('{0}','{1}')".format(
                        storage_path, local_path))
                    then = datetime.datetime.now()
                    shutil.copyfile(storage_path, local_path)
                    shutil.copystat(storage_path, local_path)
                    self._copy_time_stamp(storage_path, local_path)
                    dir_path.save(copy_name, local_path)
                    delta_sec = max(
                        (datetime.datetime.now() - then).total_seconds(), 1.)
                    try:  #The 'try' stops this logging message from getting a div by zero error some times
                        logging.info(
                            "Copy time is {0}. Copy speed is {1} Mbps".format(
                                _mbps(file_size, delta_sec)))
                    except:
                        logging.info(
                            "Copy time is {0}. Copy speed can't be calculated Mbps"
                            .format(format_delta(delta_sec)))
                    break
                except Exception as e:
                    if os.path.exists(local_path):
                        logging.warning(
                            "If a local file was created, but something went wrong (e.g. the source disappeared part way through the copying), so we remove it"
                        )
                        os.remove(local_path)
                    logging.warning("Ignore exception {0}".format(e))
            assert file_size == os.path.getsize(
                local_path), "File did not copy or did not copy completely."

        finally:
            dib_lib.remove_dibs()

        yield local_path

        assert dir_path._simple_file_exists(
            "main.txt"), "File doesn't exist: '{0}'".format(path)
Exemple #4
0
def mmultfile_ata_piece(a_filename,
                        offset,
                        work_index=0,
                        work_count=1,
                        log_frequency=-1,
                        force_python_only=False):
    t0_gtg = time.time()
    if log_frequency > 0:
        logging.info("ata_piece: Working on piece {0} of {1}.".format(
            work_index, work_count))

    a = SnpMemMap(a_filename)

    def debatch_closure(work_index2):
        return a.sid_count * work_index2 // work_count

    start = debatch_closure(work_index)
    stop = debatch_closure(work_index + 1)

    ata_piece = np.zeros((a.sid_count - start, stop - start), order='C')

    do_both = False
    if force_python_only or do_both:
        with open(a.filename, "rb") as fp:
            fp.seek(a.offset + start * a.iid_count * 8)
            slice = np.fromfile(fp,
                                dtype=np.float64,
                                count=a.iid_count * (stop - start)).reshape(
                                    a.iid_count, stop - start, order="F")
            for i in range(work_index, work_count):
                starti = debatch_closure(i)
                stopi = debatch_closure(i + 1)
                if i > work_index:
                    slicei = np.fromfile(fp,
                                         dtype=np.float64,
                                         count=a.iid_count *
                                         (stopi - starti)).reshape(a.iid_count,
                                                                   stopi -
                                                                   starti,
                                                                   order="F")
                else:
                    slicei = slice
                if log_frequency > 0 and i % log_frequency == 0:
                    logging.info("{0}/{1}".format(i, work_count))
                ata_piece[starti - start:stopi - start, :] = np.dot(
                    slicei.T, slice)
        if do_both:
            ata_piece_python = ata_piece
            ata_piece = np.zeros((a.sid_count - start, stop - start),
                                 order='C')
    if not force_python_only or do_both:
        file_dot_piece(str(a_filename),
                       a.offset,
                       a.iid_count,
                       start,
                       ata_piece,
                       num_threads=get_num_threads(None),
                       log_frequency=log_frequency)

    if do_both:
        if not np.abs(ata_piece_python - ata_piece).max() < 1e-12:
            raise AssertionError(
                "Expect Python and Rust to get the same file_dot answer")

    if log_frequency > 0:
        logging.info("ata_piece {0} of {1}: clocktime {2}".format(
            work_index, work_count, format_delta(time.time() - t0_gtg)))
    return ata_piece