Exemple #1
0
    def _create_center_line(self):

        # Isolate the center boxes for the center line
        center_line = [box for box in self.box_names if box.split("-")[0] == "0"]

        # Select the center line objects and then duplicate and merge them to create the lines
        for line in center_line:
            obj = bpy.data.objects[line]
            obj.select_set(True)

        bpy.ops.object.duplicate_move(OBJECT_OT_duplicate={"linked": False, "mode": 'TRANSLATION'})
        bpy.ops.object.join()

        # Select the temp object, then chunk the y coords of the verts into 8s due to the inset face
        temp_obj = bpy.context.selected_objects[0]
        cords = chunk_list(sorted([v.co[1] for v in temp_obj.data.vertices]), 8)

        # Create the vert list
        vert_list = []
        for i, v in enumerate(cords):
            if i > 0:
                vert_list.append([(0 - (self.line_width / 2), max(cords[i - 1]), -self.line_width),
                                  (0 - (self.line_width / 2), min(v), -self.line_width),
                                  (0 + (self.line_width / 2), min(v), -self.line_width),
                                  (0 + (self.line_width / 2), max(cords[i - 1]), -self.line_width)])

        # Create the center line object, set the origin to the location of the temp object then delete it
        face_list = chunk_list([i for i in range(len(vert_list) * 4)], len(vert_list))
        box_obj, mesh = make_mesh("TestJoin", self.box_colour)
        mesh.from_pydata(flatten(vert_list), [], face_list)
        box_obj.location = temp_obj.location
        bpy.ops.object.delete()
    def _position_values(self):
        csv_data = CsvObject(self.csv_path, [str, float, float, float])

        if csv_data.row_length != 4:
            msg = f"Csv file should contain phenotype, coefficient, lower bound, upper bound yet found" \
                  f" {csv_data.row_length} rows"
            raise IndexError(msg)

        # Normalise the values for the table plot with 0 added so we know where to draw the axis
        numerical_values = flatten([row[1:] for row in csv_data.row_data])
        normalised_value_list = normalisation_min_max(numerical_values +
                                                      [self.axis_target])

        # Isolate the axis and normal array, then chunk the normal array back into the coefficient, lower bound and
        # upper bound
        x_axis_point = normalised_value_list[-1]
        normal_array = chunk_list(normalised_value_list[:-1], 3)

        # Format the rows so we have actual - positional values for each numeric
        formatted_rows = []
        for row, normalised in zip(csv_data.row_data, normal_array):
            formatted_rows.append(
                flatten([[row[0]]] + [[row[i + 1], normalised[i]]
                                      for i in range(3)]))
        return formatted_rows, x_axis_point
Exemple #3
0
    def residual_gwas(self):
        """
        Create genetic residuals by regressing your covariant on the snp or run a more traditional gwas of

        phenotype ~ dosage + covariant_1 + ... covariant_N

        :return: Nothing, write line to fine when residuals have been estimated
        :rtype: None
        """
        # Isolate which snps are to be used
        snp_ids = self._select_snps()
        snp_chunk_list = chunk_list(snp_ids[self.start_index:], self.iter_size)

        for chunk_id, snp_chunk in enumerate(snp_chunk_list, 1):
            self.logger.write(f"Chunk {chunk_id} of {len(snp_chunk_list)}")

            # Instance the memory for all individuals (:) for snp i
            current_snps = self.gen[:, snp_chunk]

            # Transform bgen dosage of [0, 1, 0] -> 0, 1, or 2 respectively.
            dosage = sum(
                np.array([
                    snp * i for i, snp in enumerate(
                        current_snps.read(dtype=np.int8).val.T)
                ],
                         dtype=np.int8))
            self.logger.write(f"Loaded Chunk {chunk_id}: {terminal_time()}")

            # Isolate the snp names, and use them to create a dataframe of the dosage data
            snp_names = [snp.split(",")[1] for snp in current_snps.sid]
            snp_df = pd.DataFrame(dosage).T
            snp_df.columns = snp_names

            # Create a new dataframe from the merge of the snp data on IID to the master df
            snp_df = pd.concat([self.genetic_iid, snp_df], axis=1)
            df = self.df.merge(snp_df, left_on="IID", right_on="IID")

            # Run the regressions for each snp in this chunk
            [
                self.model_regressions(df, i, snp, snp_chunk)
                for i, snp in enumerate(snp_names)
            ]
    def relational_database(self, cleaned_directory, write_directory):
        """
        Create a json file for place that contains all the information across time from the standardised data via sub
        processing to speed up the process. Loads the names from the PlaceReference file set to _matcher

        At present we have multiple files, one file for each entry of data. This method will standardise all the data
        from a given place into a single dict so that we can easily combine multiple dataset's.

        Note
        ----
        This is normally quite a slow process, hence allow for multi-core functionality, but also it will NOT over-write
        files. This is so you can fix smaller areas and not have to regenerate the whole list. Keep in mind therefore
        that if you do wish to re-run the program you will need an empty write_directory.

        :param cleaned_directory: The files that have been standardised and then checked for ambiguity
        :type cleaned_directory: Path | str

        :param write_directory: The output directory
        :type write_directory: Path | str

        :return: Nothing, write each json file out and then stop
        :rtype: None
        """

        chunked = chunk_list([i for i in range(len(self._matcher))],
                             int(len(self._matcher) / self._cpu_cores))
        processes = [
            Process(target=self.relational_subprocess,
                    args=(index_list, index, cleaned_directory,
                          write_directory))
            for index, index_list in enumerate(chunked, 1)
        ]

        for process in processes:
            process.start()

        for process in processes:
            process.join()
Exemple #5
0
def main_call(out_dir, write_dir, headers):

    output = []
    for file in directory_iterator(out_dir):
        if ".log" not in file:
            csv_file = CsvObject(Path(output_dir, file))

            # Isolate the model values from the aggregated [snp] + [model 1, ... model N]
            for row in csv_file.row_data:
                snp, models = row[0], chunk_list(row[1:], len(headers))
                output.append([snp, models])

    print(f"For {len(output)} Snps")
    model_count = len(output[0][1])

    model_comp = []
    for i in range(model_count):
        print(f"For model {i+1}")

        # Write out the aggregated chromosome model data to a directory
        model_out = []
        for snp, model in output:
            model_out.append([snp] + model[i])
        write_csv(write_dir, f"Model{i + 1}", ["Snp"] + headers, model_out)

        # Append the comparision to a master list of models
        model_comp.append([f"Model {i+1}"] + [
            str(np.mean([float(values[vi]) for values in model_out]))
            for vi in range(1, 3)
        ])

    # Write the model comp out
    write_csv(
        write_dir, "Model Comparision",
        ["Model", "Mean Coefficent", "Mean Standard Error", "Mean P Values"],
        model_comp)
root = r"I:\Work\DataBases\Adjacent\Months"

not_processed = []
for file in directory_iterator(root):
    print(file)

    a = CsvObject(Path(root, file))

    target = len(a.headers) - 2

    found = 0
    for img in directory_iterator(r"I:\Work\Figures_and_tables\DiseasesOverTime"):
        year = img.split("_")[-1].split(".")[0]

        if year == Path(root, file).stem:
            found += 1

    if found != target:
        not_processed.append(file)

print(not_processed)
print(len(not_processed))


file_list = chunk_list(not_processed, int(len(not_processed) / 4))
for file in file_list[0]:
    print(file)
    CreateFrames([Path(root, file),
                  r"I:\Work\Figures_and_tables\DiseasesOverTime",
                  2, 0])