Ejemplo n.º 1
0
import difflib
import re

from Bio import pairwise2

from helpers.logger import init_logger

logger = init_logger("Annotation")


def find_edit_operations(seq1, seq2):
    matcher = difflib.SequenceMatcher(a=seq1, b=seq2)
    return matcher.get_opcodes()


def alteration_list_to_transcrit_mutation(g_test, g_ref):
    annotated_alterations = []
    for i_alteration in range(0, len(g_test.significant_alteration_list)):
        # TODO substitute biopython global alignment by python difflib
        # TODO allow for multiple mutation to be returned as non distinguishable, e.g. they are reported as being in the same group.
        is_multi = False
        curr_alteration = g_test.significant_alteration_list[i_alteration]
        ref_seq = curr_alteration.reference_sequence
        alt_seq = curr_alteration.alternative_sequence
        # logger.info("Will perform alignment between \n %s \n %s", ref_seq, alt_seq)
        alignments = pairwise2.align.globalms(ref_seq, alt_seq, 2, -3, -5, -2)
        # if more than one alignment, choose the one which with the alteration at the left in the genome
        if len(alignments) > 1:
            # logger.critical("More than one alignment for %s vs %s", g_test.significant_alteration_list[i_alteration].reference_sequence,g_test.significant_alteration_list[i_alteration].alternative_sequence)
            alignments = [alignments[0]]
        uncompact_cigar = ""
import math
import time
from threading import Thread

from helpers.logger import init_logger

logger = init_logger("Weather", False)


def clamp(value, minimum=0.0, maximum=100.0):
    return max(minimum, min(value, maximum))


class Sun(object):
    def __init__(self, azimuth, altitude):
        self.azimuth = azimuth
        self.altitude = altitude
        self._t = 0.0

    def tick(self, delta_seconds):
        self._t += 0.008 * delta_seconds
        self._t %= 2.0 * math.pi
        self.azimuth += 0.25 * delta_seconds
        self.azimuth %= 360.0
        self.altitude = (70 * math.sin(self._t)) - 20

    def __str__(self):
        return 'Sun(alt: %.2f, azm: %.2f)' % (self.altitude, self.azimuth)


class Storm(object):
Ejemplo n.º 3
0
#!/usr/bin/env python
# coding=utf8
import collections

import os
import re
import random
import msgpack
import time
import glob
import sys
from helpers.helpers import time_iterator, get_or_create_dir
from helpers.logger import init_logger

logger = init_logger('SEQLIB')

experiment_name = "TP53"
logger.info("Setting up SEQLIB for experiment %s",experiment_name)

def build_read_library(FASTQFILE_PATH):
	pattern = re.compile('([NC])_(\d+)_(\d+)')
	read_library = {'N': collections.defaultdict(set), 'C': collections.defaultdict(set)}
	FASTQFILE_ALL = os.listdir(FASTQFILE_PATH)
	logger.info("Found %d fastq file to process", len(FASTQFILE_ALL))
	for j, a_fastq_file in time_iterator(FASTQFILE_ALL, logger, msg_prefix="Building read library"):
		if a_fastq_file == ".DS_Store":
			continue
		fragment = pattern.search(a_fastq_file).group(1)
		individu = pattern.search(a_fastq_file).group(2)
		fastq = open(FASTQFILE_PATH + "/" + a_fastq_file, 'r')
		lines = fastq.readlines()
Ejemplo n.º 4
0
if gpus:
  try:
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
  except:
    pass

import settings
from helpers.environment_control import start_carla, terminate_carla
from helpers.logger import init_logger
from modules.agent import Agent
from modules.modelhandler import ModelHandler
from modules.weather_control import WeatherControlThread
from modules.trafic_control import TraficControlThread

logger = init_logger("Play", False)

MODEL_WEIGHTS = ""

def play():
  client = carla.Client(settings.CONNECTION_IP, settings.CONNECTION_PORT)
  client.set_timeout(20.0)

  # Create controllers
  trafic_control = TraficControlThread(client)
  weather_control = WeatherControlThread(client)
  trafic_control.start()
  weather_control.start()
  logger.info("Controllers started")

  predicter = ModelHandler(settings.MODEL_NAME, target_weights_path=MODEL_WEIGHTS, train=False)
Ejemplo n.º 5
0
import carla
import time
import atexit
import sys
import os
import subprocess
import psutil

import settings
from helpers.logger import init_logger

logger = init_logger("ENV Controll", False)

def operating_system():
  logger.debug(os.name)
  return 'windows' if os.name == 'nt' else 'linux'

def get_binary():
  return 'CarlaUE4.exe' if operating_system() == 'windows' else 'CarlaUE4.sh'

def get_exec_command():
  binary = get_binary()
  exec_command = binary if operating_system() == 'windows' else ('./' + binary)

  return binary, exec_command

def terminate_process(binary):
  for process in psutil.process_iter():
    if process.name().lower().startswith(binary.split('.')[0].lower()) or process.name().lower().startswith(binary.lower()):
      try:
        process.terminate()
Ejemplo n.º 6
0
import subprocess
import time
import settings
from helpers.logger import init_logger

logger = init_logger("Infinite trainer", False)

num_of_restarts = 0

if __name__ == '__main__':
    tbp = None
    if settings.START_TENSORBOARD_ON_TRAINING:
        tbp = subprocess.Popen(
            f"./venv/Scripts/python.exe -m tensorboard.main --logdir logs",
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)

    p = subprocess.Popen("./venv/Scripts/python.exe train.py")
    while True:
        try:
            p.wait()
            logger.info("Restarting environment in 5s")
            time.sleep(5)
            p = subprocess.Popen("./venv/Scripts/python.exe train.py")
            num_of_restarts += 1
        except KeyboardInterrupt:
            p.send_signal(subprocess.signal.CTRL_C_EVENT)
            logger.info("Training interrupted")
            break
        except Exception as e:
            logger.error(f"Infinite loop exception\n{e}")
from argparse import ArgumentParser
import os
import pprint as pp
import collections
from Bio import SeqIO
from Bio.Alphabet import generic_dna
import Levenshtein
import sys
from helpers.logger import init_logger

__author__ = "hayssam"

logger = init_logger("Reads pre-processor")


def identify_primer_with_mismatches(seq, fw, rev, max_mismatch=8):
    for primer in fw:
        d = Levenshtein.distance(seq[: len(primer)], primer)
        if d < max_mismatch:
            return +1, primer
    for primer in rev:
        d = Levenshtein.distance(seq[: len(primer)], primer)
        if d < max_mismatch:
            return -1, primer
    return None, None


def identify_primer(seq, fw, rev):
    for primer in fw:
        if seq.startswith(primer):
            return +1, primer
Ejemplo n.º 8
0
import tensorflow as tf
from tensorflow.keras.callbacks import Callback
import tensorflow.keras.backend as K

from helpers.logger import init_logger

logger = init_logger("Tensorboard", False)


class TensorBoardCustom(Callback):
    def __init__(self, log_dir):
        super().__init__()
        self.step = 0
        self.log_dir = log_dir
        self.writer = None

    def __del__(self):
        try:
            if self.writer:
                self.writer.close()
        except:
            pass

    def on_epoch_end(self, epoch, logs=None):
        self.update_stats(self.step, **logs)

    def init_writer_check(self):
        if not self.writer:
            self.writer = tf.summary.create_file_writer(self.log_dir)

    def log_weights(self, model: tf.keras.Model):
Ejemplo n.º 9
0
from collections import defaultdict
import os

import pandas as pd
from helpers.helpers import time_iterator
from helpers.logger import init_logger

pd.set_option('display.width', 250)
import random
import itertools
import simplejson
import binascii

__author__ = 'hayssam'

logger = init_logger("POSPROCESS")


def hash_dict(d):
	return hash(tuple(sorted(d.items())))


# @@@

# find closest alterations

def decorate_alt(alt, origin):
	d = {"origin": origin, "start": alt['start'], "alt_type": alt['alt_type'], "len": alt['alt_length'], "sequence": alt["alt_content"]}
	d['hash'] = hash_dict(d)
	return d
Ejemplo n.º 10
0
import time
import carla

import settings
from helpers.environment_control import start_carla, terminate_carla
from modules.weather_control import WeatherControlThread
from modules.trafic_control import TraficControlThread
from helpers.logger import init_logger

logger = init_logger("Pure Environment", False)


def handle_environment():
    client = carla.Client(settings.CONNECTION_IP, settings.CONNECTION_PORT)
    client.set_timeout(20.0)

    logger.info("Starting environment controllers")
    trafic_control = TraficControlThread(client)
    weather_control = WeatherControlThread(client)
    trafic_control.start()
    weather_control.start()
    logger.info("Controllers started")

    try:
        while True:
            client.get_world()
            time.sleep(20)
    except KeyboardInterrupt:
        logger.info("Environment exited")
    except:
        logger.error("Environment failed")
Ejemplo n.º 11
0
import sys
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Conv2D, AveragePooling2D, Flatten, Concatenate, Dropout, Activation
from tensorflow.keras.optimizers import Adam, SGD, Adadelta
from tensorflow.keras.initializers import RandomNormal

import settings
from helpers.logger import init_logger

logger = init_logger("Models", False)

kernel_initializer = RandomNormal(stddev=0.02)

image_shape = (settings.CAMERA_IMAGE_DIMENSIONS[1],
               settings.CAMERA_IMAGE_DIMENSIONS[0],
               settings.CAMERA_IMAGE_DIMENSIONS[2])
front_camera_input = Input(name="front_camera_input", shape=image_shape)
right_camera_input = Input(name="right_camera_input", shape=image_shape)
left_camera_input = Input(name="left_camera_input", shape=image_shape)
# noinspection PyTypeChecker
combined_cameras_input = Concatenate(name="combine_cameras_concanate")(
    [front_camera_input, left_camera_input, right_camera_input])


# noinspection PyTypeChecker
def CNN_5_residual():
    cnn_1 = Conv2D(
        64, (7, 7), padding='same',
        kernel_initializer=kernel_initializer)(combined_cameras_input)
    cnn_1a = Activation('relu')(cnn_1)
    cnn_1c = Concatenate()([cnn_1a, combined_cameras_input])
Ejemplo n.º 12
0
import time
import carla
import random
from threading import Thread

from helpers.logger import init_logger
import settings

logger = init_logger("Trafic", False)


class TraficControlThread(Thread):
    def __init__(self, client):
        super().__init__()
        self.client = client
        self.daemon = True

        self.traffic_manager = self.client.get_trafficmanager()
        self.traffic_manager.set_global_distance_to_leading_vehicle(2.0)

        self.world = self.client.get_world()
        self.spawn_points = self.world.get_map().get_spawn_points()

        self.blueprints = self.world.get_blueprint_library()

        self.vehicle_blueprints = self.blueprints.filter('vehicle.*')
        self.vehicle_blueprints = [
            x for x in self.vehicle_blueprints
            if int(x.get_attribute('number_of_wheels')) == 4
        ]
        self.vehicle_blueprints = [
Ejemplo n.º 13
0
#!/usr/bin/env python
# coding=utf8
import collections
import networkx as nx
from helpers.logger import init_logger

logger = init_logger("RANDGRAPH")

cached_kmers = {}
cached_pairs = {}
KMER_UID = {}
curr_uid = 0
last_sample = None


def kmerize_iter(s, k):
    for i in range(0, len(s) - k):
        yield s[i:i + k]


class RandomReadsGraph:
    def __init__(self, coverage_dict, k, seq_lib_module, restrict_to=None):
        global cached_kmers, curr_uid, last_sample
        self.coverage_dict = coverage_dict
        # TODO optimize indexation
        self.kmer_map = collections.defaultdict(set)
        self.restrict_to = set(restrict_to) if restrict_to else None
        self.possible_pairs = set()
        read_list = seq_lib_module.sampling(self.coverage_dict)
        self.dbg = nx.DiGraph()
Ejemplo n.º 14
0
#!/usr/bin/env python
# coding=utf8
import collections

import os
import re
import random
import msgpack
import time
import glob
import sys
from helpers.helpers import time_iterator, get_or_create_dir
from helpers.logger import init_logger

logger = init_logger('SEQLIB')
logger.info("Setting up SEQLIB")


# For an allowed mismatch rate during cutting = 0.1
FASTQFILE_PATH = "data/fastq/all_pool_trimmed0.1"


def build_read_library():
	pattern = re.compile('([NC])_(\d+)_(\d+)')
	read_library = {'N': collections.defaultdict(set), 'C': collections.defaultdict(set)}

	FASTQFILE_ALL = os.listdir(FASTQFILE_PATH)
	logger.info("Found %d fastq file to process", len(FASTQFILE_ALL))
	for j, a_fastq_file in time_iterator(FASTQFILE_ALL, logger, msg_prefix="Building read library"):
		if a_fastq_file == ".DS_Store":
			continue
Ejemplo n.º 15
0
#!/usr/bin/env python
# coding=utf8
import re
from Bio import SeqIO
from Bio.Alphabet import generic_dna
import networkx as nx
from Bio import pairwise2
import collections
from helpers.logger import init_logger

from alteration import alteration as ALT

# from alteration import alteration as ALT
logger = init_logger("Patient graph")

pattern = re.compile('.+\/(\w+)')


class PatientGraph:
	def __init__(self, fastq_files, kmer_length):
		self.coverage = {}
		self.coverage['total'] = 0
		self.alteration_list = []
		self.dbg = nx.DiGraph()
		self.dbgclean = None
		self.n_reads = 0
		self.kmer_start_set = set()
		self.kmer_end_set = set()

		for f in fastq_files:
			fastq_id = pattern.search(f).group(1)
Ejemplo n.º 16
0
from collections import defaultdict
import os

import pandas as pd
from helpers.helpers import time_iterator
from helpers.logger import init_logger

pd.set_option('display.width', 250)
import random
import itertools
import simplejson
import binascii

__author__ = 'hayssam'

logger = init_logger("POSTPROCESS")


def hash_dict(d):
    return hash(tuple(sorted(d.items())))


# @@@

# find closest alterations


def decorate_alt(alt, origin):
    d = {
        "origin": origin,
        "start": alt['start'],
Ejemplo n.º 17
0
from helpers.helpers import time_iterator, get_or_create_dir
from helpers.logger import init_logger
from Bio import pairwise2
logger = init_logger('Annotation')

def alteration_list_to_transcrit_mutation(g_test,g_ref):
	for i_alteration in range(0, len(g_test.significant_alteration_list)):
		alignments = pairwise2.align.globalms(g_test.significant_alteration_list[i_alteration].reference_sequence, g_test.significant_alteration_list[i_alteration].alternative_sequence, 2, -3, -5, -2)
		# if more than one alignment, choose the one which with the alteration at the left in the genome
		if len(alignments) > 1:
			# logger.critical("More than one alignment for %s vs %s", g_test.significant_alteration_list[i_alteration].reference_sequence,g_test.significant_alteration_list[i_alteration].alternative_sequence) 
			alignments = [alignments[0]]
		uncompact_cigar = ""
		compact_cigard = []
		for i_nucleotide in range(0,alignments[0][4]):
			if alignments[0][0][i_nucleotide] == alignments[0][1][i_nucleotide] :
				uncompact_cigar += "M"
			elif alignments[0][0][i_nucleotide] == "-":
				uncompact_cigar += "I"
			elif alignments[0][1][i_nucleotide] == "-":
				uncompact_cigar += "D"
			else:
				uncompact_cigar += "X"
		# print uncompact_cigar
		operation = uncompact_cigar[0]
		count = 0
		for i_nucleotide in range(0,len(uncompact_cigar)):
			if uncompact_cigar[i_nucleotide] != operation:
				compact_cigard += [count,operation]
				operation = uncompact_cigar[i_nucleotide]
				count = 0
Ejemplo n.º 18
0
import difflib
import re

from Bio import pairwise2
import itertools
from helpers import intset

from helpers.logger import init_logger

logger = init_logger('Annotation')


def find_edit_operations(seq1, seq2):
    matcher = difflib.SequenceMatcher(a=seq1, b=seq2)
    return matcher.get_opcodes()


def merge_identical_alterations(annotated_alterations):
    # identify groups having exactly the same start and end
    annotated_alterations.sort(key=lambda alt: (alt['start'], alt['end']))
    merged_alterations = []
    for g, alt_group in itertools.groupby(annotated_alterations,
                                          key=lambda alt:
                                          (alt['start'], alt['end'])):
        best_alt = max(list(alt_group), key=lambda alt: alt['alt_read_count'])
        merged_alterations.append(best_alt)
    # identify alterations belonging to the same interval
    all_ranges = [(x['start'], x['end']) for x in merged_alterations]
    logger.info("Will merge ranges: %s", all_ranges)
    int_sets = intset.IntSet(*all_ranges)
    normalized_ranges = [intset.IntSet(r) for r in int_sets._ranges]
Ejemplo n.º 19
0
#!/usr/bin/env python
# coding=utf8

import json
from Bio import pairwise2

import networkx as nx
from argparse import ArgumentParser
import time
from forannotation import find_edit_operations
from helpers.helpers import time_iterator, get_or_create_dir, get_timestamp, get_git_revision_hash
from helpers.logger import init_logger
import sys

logger = init_logger('MICADo')

## imports
logger.info("Will import")
from reference_graph import ReferenceGraph as RG
from patient_graph import PatientGraph as PG
from randomreadsgraph import RandomReadsGraph as RRG

logger.info("Import finished")


def process_sample(kmer_length,
                   min_support_percentage,
                   n_permutations,
                   p_value_threshold,
                   max_len,
                   sample_key=None,
Ejemplo n.º 20
0
#!/usr/bin/env python
# coding=utf8
import re

from Bio import SeqIO
from Bio.Alphabet import generic_dna
import networkx as nx
import swalign

import collections
from alteration import alteration as ALT
from helpers.logger import init_logger


pattern = re.compile('([NC])_(\d+)_(\d+)')
logger = init_logger("individu graph")

## For SWA alignment
match = 2
mismatch = -1
scoring = swalign.NucleotideScoringMatrix(match, mismatch)
sw = swalign.LocalAlignment(scoring)


class IndividuGraph:
	def __init__(self, fastq_list, k):
		self.coverage = {}
		self.alteration_list = []
		self.dbg = nx.DiGraph()
		self.dbgclean = None
		self.kmer_start_set = set()
Ejemplo n.º 21
0
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except:
        pass

import settings
from helpers.environment_control import start_carla, terminate_carla
from helpers.logger import init_logger
from modules.agent import Agent
from modules.weather_control import WeatherControlThread
from modules.trafic_control import TraficControlThread
from modules.modelhandler import ModelHandler

logger = init_logger("Training", False)

random.seed(settings.RANDOM_SEED)
tf.random.set_seed(settings.RANDOM_SEED)


def save_models(trainer, model_name, episode, epsilon, score):
    model_name = f"{model_name}__{episode}ep__{epsilon}eps__{score}score.h5"
    path = os.path.join(settings.MODEL_SAVE_PATH, model_name)

    logger.info(f"Saving new record model\n{model_name}")
    trainer.save_weights(path)


class Trainer(Thread):
    def __init__(self, client, identifier, epsilon, get_qs_callbatch,
from argparse import ArgumentParser
import os
import pprint as pp
import collections
from Bio import SeqIO
from Bio.Alphabet import generic_dna
import Levenshtein
import sys
from helpers.logger import init_logger

__author__ = 'hayssam'

logger = init_logger('Reads pre-processor')


def identify_primer_with_mismatches(seq, fw, rev, max_mismatch=8):
	for primer in fw:
		d = Levenshtein.distance(seq[:len(primer)], primer)
		if d < max_mismatch:
			return +1, primer
	for primer in rev:
		d = Levenshtein.distance(seq[:len(primer)], primer)
		if d < max_mismatch:
			return -1, primer
	return None, None


def identify_primer(seq, fw, rev):
	for primer in fw:
		if seq.startswith(primer):
			return +1, primer
from collections import deque
from typing import Union
import random
import time
import os
import numpy as np
from threading import Thread
from modules.custom_tensorboard import TensorBoardCustom
from tensorflow.keras.utils import plot_model
import gc

from modules.models import create_model
from helpers.logger import init_logger
import settings

logger = init_logger("ModelHandler", False)

action_normalizer_value = float(len(settings.ACTIONS) - 1)


class ModelHandler(Thread):
    def __init__(self,
                 model: str,
                 weights_path: str = None,
                 target_weights_path: str = None,
                 train: bool = True):
        super().__init__()
        self.daemon = True
        self.terminate = False
        self.__training = train
        self.__halt_training = False
Ejemplo n.º 24
0
import itertools

__author__ = 'hayssam'
import difflib
import os
import re
import simplejson as json
from helpers.helpers import time_iterator
from helpers.logger import init_logger

__author__ = 'hayssam'
import pandas as pd

logger = init_logger("GATKPPROC", {})


def hash_dict(d):
    return hash(tuple(sorted(d.items())))


def enrich_caller_record(a_rec, caller):
    res_dict = {'alt_sequence': a_rec['ALT'], 'ref_sequence': a_rec['REF']}
    res_dict['alt_length'] = abs(
        len(res_dict['alt_sequence']) - len(res_dict['ref_sequence']))

    matcher = difflib.SequenceMatcher(a=res_dict['ref_sequence'],
                                      b=res_dict['alt_sequence'])
    op = [x[0] for x in matcher.get_opcodes() if x[0] != 'equal']
    if "insert" in op:
        res_dict['alt_type'] = "I"
    elif "delete" in op:
Ejemplo n.º 25
0
import carla
from carla import ColorConverter as cc
import random
import math
import time
import numpy as np
import gc
import cv2

from helpers.logger import init_logger
import settings

logger = init_logger("Agent", False)


class Agent:
    def __init__(self, identifier: int, client, train: bool = True):
        self.client = client
        self._id = identifier

        self.world = self.client.get_world()
        self.blueprint_library = self.world.get_blueprint_library()
        self.vehicle_bp = self.blueprint_library.filter("model3")[0]

        self.camera_bp = self.blueprint_library.find("sensor.camera.rgb")
        self.camera_bp.set_attribute("image_size_x",
                                     f"{settings.CAMERA_IMAGE_DIMENSIONS[0]}")
        self.camera_bp.set_attribute("image_size_y",
                                     f"{settings.CAMERA_IMAGE_DIMENSIONS[1]}")
        self.camera_bp.set_attribute("fov", f"{settings.CAMERA_FOV}")
__author__ = 'hayssam'
import pandas as pd



# mode="SUPERVISED"
# XPDIR = "data/synthetic/"
# XPKEY = "synthetic"

mode = "UNSUPERVISED"
XPDIR = "data/tp53_analysis/"
XPKEY = "pool0"


logger = init_logger("RESULTPROCESSING[%s]"%(mode), {})
def hash_dict(d):
	return hash(tuple(sorted(d.items())))


def closest_alteration(alt, alt_list):
	if len(alt_list) < 1:
		return None
	closest = min(alt_list, key=lambda x: abs(x['start'] - alt['start']))

	return closest


def is_match(x, closest):
	if not closest:
		return False
Ejemplo n.º 27
0
#!/usr/bin/env python
# coding=utf8

# python src/principal.py --samplekey 83_1 --fastq /Users/rudewicz/didac/DiDaC/data/fastq/all_pool_trimmed0.1/C_83_1.fastq,/Users/rudewicz/didac/DiDaC/data/fastq/all_pool_trimmed0.1/N_83_1.fastq --fasta /Users/rudewicz/didac/MICADo/data/reference/reference_TP53.fasta --snp /Users/rudewicz/didac/MICADo/data/reference/snp_TP53.tab  --kmer_length 20 --npermutations 100 --experiment TP53
import json
from Bio import pairwise2

import networkx as nx
from argparse import ArgumentParser
import time
from forannotation import find_edit_operations
from helpers.helpers import time_iterator, get_or_create_dir, get_timestamp, get_git_revision_hash
from helpers.logger import init_logger
import sys

logger = init_logger('MICADo')

## imports
logger.info("Will import")
from reference_graph import ReferenceGraph as RG
import visualization as VISU
from patient_graph import PatientGraph as PG

logger.info("Import finished")


def process_sample(kmer_length, min_support_percentage, n_permutations, p_value_threshold, sample_key=None, fastq_files=None, fasta_file=None, snp_file=None, experiment_name=None,
				   destination_directory=".", export_gml=False, output_results=None):
	if experiment_name == "TP53":
		from randomreadsgraph_TP53 import RandomReadsGraph as RRG
	else:
Ejemplo n.º 28
0
# For one PCR amplicon
#!/usr/bin/env python
# coding=utf8
import collections
import networkx as nx
from helpers.logger import init_logger
import seq_lib as SL

logger = init_logger("RANDGRAPH")

class RandomReadsGraph:
	def __init__(self, coverage_dict, k,restrict_to={}):
		self.coverage = sum(coverage_dict.values())
		self.kmer_map=collections.defaultdict(set)
		self.restrict_to=set(restrict_to)
		self.possible_pairs=set()
		read_list = SL.sampling(self.coverage)
		self.dbg = nx.DiGraph()
		for i_read in range(0, len(read_list)):
			this_read=read_list[i_read]
			kkmers=[this_read[i:i+k] for i in xrange(len(this_read)-k) if this_read[i:i+k] in self.restrict_to]
			kmers_pairs=[(f1,f2) for f1,f2 in zip(kkmers,kkmers[1:])]
			for kmer in kkmers:
				self.kmer_map[kmer].add(i_read)
			self.possible_pairs.update(kmers_pairs)

	def build_read_set_for_path(self, a_path, verbose=False):
		# a_path=map(lambda x:KMER_UID[x],a_path)
		missing_kmers = set(a_path).difference(self.kmer_map)
		if len(missing_kmers):
			# logger.critical("Completely missing kmer (%d): %s", len(missing_kmers), missing_kmers)
Ejemplo n.º 29
0
import random
import itertools
import pandas as pd
import time
import sys
from pyparsing import ParseException
from helpers import helpers
from helpers.logger import init_logger
from helpers.helpers import time_iterator, get_git_revision_hash
from read_sampler.cigar_parser import parse_cigar_string

pd.set_option('display.width', 250)

__author__ = 'hayssam'

logger = init_logger(name="READSAMPLER")


# convert coordinates
def coordinate_map(an_alignment_row):
    global args
    range_accumulator = []
    cigar = an_alignment_row.CIGAR
    start_pos = an_alignment_row.POS + args.systematic_offset
    label = an_alignment_row.QNAME
    read_start_range = (0, 0)
    ref_start_range = (start_pos, start_pos)
    read_last_range = read_start_range
    ref_last_range = ref_start_range
    try:
        parse_results = parse_cigar_string(cigar)
Ejemplo n.º 30
0
#!/usr/bin/env python
# coding=utf8
import re
from Bio import SeqIO
from Bio.Alphabet import generic_dna
import networkx as nx
from Bio import pairwise2
import collections
from helpers.logger import init_logger

from alteration import alteration as ALT

# from alteration import alteration as ALT
logger = init_logger("Patient graph")

pattern = re.compile('.+\/(\w+)')


class PatientGraph:
    def __init__(self, fastq_files, kmer_length):
        self.coverage = {}
        self.coverage['total'] = 0
        self.alteration_list = []
        self.dbg = nx.DiGraph()
        self.dbgclean = None
        self.n_reads = 0
        self.kmer_start_set = set()
        self.kmer_end_set = set()

        for f in fastq_files:
            fastq_id = pattern.search(f).group(1)
Ejemplo n.º 31
0
import random
import itertools
import pandas as pd
import time
import sys
from pyparsing import ParseException
from helpers import helpers
from helpers.logger import init_logger
from helpers.helpers import time_iterator, get_git_revision_hash
from read_sampler.cigar_parser import parse_cigar_string

pd.set_option('display.width', 250)

__author__ = 'hayssam'

logger = init_logger(name="READSAMPLER")


# convert coordinates
def coordinate_map(an_alignment_row):
	global args
	range_accumulator = []
	cigar = an_alignment_row.CIGAR
	start_pos = an_alignment_row.POS + args.systematic_offset
	label = an_alignment_row.QNAME
	read_start_range = (0, 0)
	ref_start_range = (start_pos, start_pos)
	read_last_range = read_start_range
	ref_last_range = ref_start_range
	try:
		parse_results = parse_cigar_string(cigar)
Ejemplo n.º 32
0
#!/usr/bin/env python
# coding=utf8
## imports

import networkx as nx
from argparse import ArgumentParser
from helpers.helpers import time_iterator, get_or_create_dir
from helpers.logger import init_logger
import sys

logger = init_logger('PACBIOP53')

## parameters

## imports
logger.info("Will import")
import reference_graph as RG
import visualization as VISU
from individugraph import IndividuGraph as IG
from randomreadsgraph import RandomReadsGraph as RRG
import forannotation as ANNO

logger.info("Import finished")



def process_sample(kmer_length, min_support_percentage,  n_permutations, sample_key=None, c_fastq_file=None, n_fastq_file=None, destination_directory=".", export_gml=False):

	# g_ref construction
	logger.info("Will build reference graph with k==%d", kmer_length)
	g_ref = RG.ref_constructor(kmer_length)
Ejemplo n.º 33
0
import simplejson as json
from helpers.helpers import time_iterator
from helpers.logger import init_logger

__author__ = 'hayssam'
import pandas as pd

# mode="SUPERVISED"
# XPDIR = "data/synthetic/"
# XPKEY = "synthetic"

mode = "UNSUPERVISED"
XPDIR = "data/tp53_analysis/"
XPKEY = "pool0"

logger = init_logger("RESULTPROCESSING[%s]" % (mode), {})


def hash_dict(d):
    return hash(tuple(sorted(d.items())))


def closest_alteration(alt, alt_list):
    if len(alt_list) < 1:
        return None
    closest = min(alt_list, key=lambda x: abs(x['start'] - alt['start']))

    return closest


def is_match(x, closest):