Ejemplo n.º 1
0
def create_squad_examples(raw_data, desc, dir_tokenizer):
    tokenizer = BertWordPieceTokenizer(os.path.join(dir_tokenizer,
                                                    "vocab.txt"),
                                       lowercase=True)
    p_bar = tqdm(total=len(raw_data["data"]),
                 desc=desc,
                 position=0,
                 leave=True,
                 file=sys.stdout,
                 bar_format="{l_bar}%s{bar}%s{r_bar}" %
                 (Fore.BLUE, Fore.RESET))
    squad_examples = []
    for item in raw_data["data"]:
        for para in item["paragraphs"]:
            context = para["context"]
            for qa in para["qas"]:
                question = qa["question"]
                if "answers" in qa:
                    answer_text = qa["answers"][0]["text"]
                    start_char_idx = qa["answers"][0]["answer_start"]
                    all_answers = [_["text"] for _ in qa["answers"]]
                    squad_eg = Sample(tokenizer, question, context,
                                      start_char_idx, answer_text, all_answers)
                else:
                    squad_eg = Sample(tokenizer, question, context)
                squad_eg.preprocess()
                squad_examples.append(squad_eg)
        p_bar.update(1)
    p_bar.close()
    return squad_examples
def create_sample(db_path, instrument_node):
    path = db_path + os.path.sep + instrument_node.xpath(
        "./relativeFilepath")[0].text
    sample_rate = int(instrument_node.xpath("./sampleRate")[0].text)
    channels = int(instrument_node.xpath("./numChannels")[0].text)
    duration = float(instrument_node.xpath("./length")[0].text)
    return Sample(path, sample_rate, channels, duration)
Ejemplo n.º 3
0
    def go_fetch(kfold_n: int = 5):
        # NOTE: File structure relative to this script is expected to be as follows.
        # .
        # +-- Captures
        # |   +-- Make x_0
        # |   |   +-- Model y_0
        # |   |   |   +-- ModelYear z_0
        # |   |   |   |   +-- Samples
        # |   |   |   |   |   +-- sample = re.match('loggerProgram[\d]+.log', a_file_in_this_folder)
        # |   +-- Make x_1.... etc.
        # +-- Some folder
        # |   +-- The directory with these scripts
        # |   |   +-- this_script.py

        script_dir: str = getcwd()
        chdir("../../")
        if not path.exists("Captures"):
            # Make sure your local directory structure matches the example above. If not... adjust accordingly
            print("Error finding Captures folder. Please check the relative path between this script and Captures.")
            print("See the source of go_fetch() in FileBoi.py for an example of the expected relative paths.")
            chdir(script_dir)
            quit()

        chdir("Captures")
        root_dir = getcwd()
        make: str = ""
        model: str = ""
        year: str = ""
        current_vehicle = []
        sample_dict = {}
        for dirName, subdirList, fileList in walk(root_dir, topdown=True):
            this_dir = path.basename(dirName)
            if len(subdirList) == 0:
                if len(current_vehicle) == 3:
                    make = current_vehicle[0]
                    model = current_vehicle[1]
                    year = current_vehicle[2]
                elif len(current_vehicle) == 2:
                    model = current_vehicle[0]
                    year = current_vehicle[1]
                elif len(current_vehicle) == 1 and current_vehicle != "":
                    year = current_vehicle[0]
                for file in fileList:
                    # Check if this file name matches the expected name for a CAN data sample. If so, create new Sample
                    m = re.match('loggerProgram[\d]+.log', file)
                    if m:
                        if not (make, model, year) in sample_dict:
                            sample_dict[(make, model, year)] = []
                        this_sample_index = str(len(sample_dict[(make, model, year)]))
                        this_sample = Sample(make=make, model=model, year=year, sample_index=this_sample_index,
                                             sample_path=dirName + "/" + m.group(0), kfold_n=kfold_n)
                        sample_dict[(make, model, year)].append(this_sample)
                current_vehicle = []
            else:
                if this_dir == "Captures":
                    continue
                current_vehicle.append(this_dir)

        chdir(script_dir)
        return sample_dict
Ejemplo n.º 4
0
    def new(self, params):
        os.makedirs(params.out_dir, exist_ok=True)

        self.params = params
        w.write(params.out_dir + "/params.json",
                w.pretty_json(params.to_json()))

        self.sample = Sample().new(params)
        sample_file = params.out_dir + "/sample.fasta"
        w.write(sample_file, w.fasta(self.sample))

        art_prefix = params.out_dir + "/art"
        art = os.environ['ART_ILLUMINA']
        subprocess.run([
            art, "--in", sample_file, "--out", art_prefix, "--rndSeed",
            str(params.seed)
        ] + params.art_flags,
                       stdout=subprocess.DEVNULL)
        self.art_output = r.read(art_prefix + ".aln", r.aln(params.take_ref))

        self.instance = Instance().new(params, self.art_output)
        w.write(params.out_dir + "/instance.json",
                w.json(self.instance.to_json()))
        w.write(params.out_dir + "/instance.txt",
                w.text(self.instance.to_text()))
        w.write(params.out_dir + "/instance.stats.json",
                w.json(self.instance.stats()))

        return self
Ejemplo n.º 5
0
def create_Samples_from_header(line, sample_list):
    """Creates sample objects for input file.
    It uses CN and CP input lists from users.
    
    It creates a sample object for each one containing the name
    and the column index for genotype, depth amd var/depth (ratio) columns    
    """

    sample_object_list = []
    geno_sufix = '.replicate1_Genotype'
    depth_sufix = '.replicate1_Depth'
    ratio_sufix = '.replicate1_Var/Depth'

    columns = line.split('|')
    #if not set(sample_list).issubset(set(columns)): quit("ERROR: Specified samples: %s couldn't be found in input annotation file." %sample_list)
    for sample_name in sample_list:
        try:
            genotype_col = columns.index(sample_name + geno_sufix)
            depth_col = columns.index(sample_name + depth_sufix)
            ratio_col = columns.index(sample_name + ratio_sufix)

            new_sample = Sample(sample_name, genotype_col, depth_col,
                                ratio_col)
            sample_object_list.append(new_sample)

        except ValueError:
            print "ERROR: Specified samples: %s couldn't be found in input annotation file." % sample_list
            quit()

    return sample_object_list
Ejemplo n.º 6
0
def create_Samples_from_header(line):
    """Creates sample objects for input file.
    It uses CN and CP input lists from users.
    
    It creates a sample object for each one containing the name
    and the column index for genotype, depth amd var/depth (ratio) columns    
    """
    samples_name_list = []
    sample_return_list = []

    geno_sufix = '.replicate1_Genotype'
    depth_sufix = '.replicate1_Depth'
    ratio_sufix = '.replicate1_Var/Depth'

    columns = line.split('|')

    for i in columns:
        res = re.search('(.*)\.replicate1_Genotype$', i)
        if res: samples_name_list.append(res.group(1))

    for sample_name in samples_name_list:
        genotype_col = columns.index(sample_name + geno_sufix)
        depth_col = columns.index(sample_name + depth_sufix)
        ratio_col = columns.index(sample_name + ratio_sufix)

        new_sample = Sample(sample_name, genotype_col, depth_col, ratio_col)

        #We create and set a VariantCounter object to the Sample.
        new_sample.set_variant_counter(VariantCounter())
        sample_return_list.append(new_sample)

    return sample_return_list
Ejemplo n.º 7
0
def main(eval_name, ml):
    uc = UserController()
    followers = uc.getFollowers(eval_name)
    directory = "followers_{}".format(eval_name)
    full_directory = "data/{}".format(directory)
    if not os.path.exists(full_directory):
        os.makedirs(full_directory)

    for name in followers:
        user = User(name, "{}/unknown".format(directory, name))
        if user.valid:
            user.writeFile()

    path = "data/followers_{}".format(eval_name)
    follower_files = [f for f in listdir(path) if isfile(join(path, f))]
    result = {}
    for i in range(2):
        for j in range(2):
            for k in range(2):
                for l in range(2):
                    type = [i, j, k, l]
                    result[decodeType(type)] = []

    for file in follower_files:
        follower = Sample("{}/{}".format(path, file))
        type = ml.predict(follower.data)
        result[decodeType(type)].append(follower.twitName)

    with open("evaluated/follower_types_{}.json".format(eval_name), "w") as f:
        json.dump(result, f)
 def get_data(self):
     print('Beggining audio file reading...')
     for x in range(0 , len(self.genres)): 
         genre_path = self.main_path + '\\' + self.genres[x]
         drop_indices = sorted(random.sample(range(1,100),20))
         self.excluded.append(drop_indices)
         sample_list = list(range(0,100))
         for i in reversed(drop_indices): 
             del sample_list[i]
         for i in range(0,self.n_samples):
             current_count = '.' + '%05d' % i
             song_path = genre_path + '\\' + self.genres[x] + current_count + '.au'
             sample = Sample(song_path)
             self.audio_data.extend(sample.final_data)
             for y in range(0,sample.image_number):  
                 self.labels.append(x)
             if i%10 == 0:
                 print('Percentage Complete :' , 
                     int(((x*100)+i)/(len(self.genres)*100)*100) , '%')
     print('Audio read succesfully')
     print('Saving droped indices')
     self.excluded = np.array(self.excluded)
     print(self.excluded)
     try: 
         np.save('C:\\Users\\nick\\Desktop\\Final_Project\\excluded_indices',
             self.excluded)
     except Exception as e: 
         print('Write Failed')
     self.audio_data = np.array(self.audio_data)
     self.labels = np.array(self.labels)
     self.labels_onehot = (np.arange(len(self.genres)) == 
         self.labels[: , None]).astype(int)
Ejemplo n.º 9
0
def filterData(reconData, logDir, fileEnding, samplingInterval, coefficients):
    # Filter the reconstructed data
    #print "Filtering the data..."
    filteredData = []
    time, x, y, z = splitData(reconData)
    xFiltered = scipy.signal.convolve(coefficients, x)
    yFiltered = scipy.signal.convolve(coefficients, y)
    zFiltered = scipy.signal.convolve(coefficients, z)

    numberToDrop = len(coefficients) - 1
    time = time[numberToDrop:]
    xFiltered = xFiltered[numberToDrop:]
    yFiltered = yFiltered[numberToDrop:]
    zFiltered = zFiltered[numberToDrop:]

    for i, v in enumerate(time):
        sample = Sample()
        sample.time = time[i]
        sample.position.x = xFiltered[i]
        sample.position.y = yFiltered[i]
        sample.position.z = zFiltered[i]
        filteredData.append(sample)

    filteredData = filteredData[:-200]
    exportData(logDir + "FilteredData_" + fileEnding + ".txt", filteredData)

    return [filteredData]
Ejemplo n.º 10
0
def getDSDFilelist(xml_path):
    tree = etree.parse(xml_path)
    root = tree.getroot()
    db_path = root.find("./databaseFolderPath").text
    tracks = root.findall(".//track")

    train_vocals, test_vocals, train_mixes, test_mixes, train_accs, test_accs = list(), list(), list(), list(), list(), list()

    for track in tracks:
        # Get mix and vocal instruments
        vocals = create_sample(db_path, track.xpath(".//instrument[instrumentName='Voice']")[0])
        mix = create_sample(db_path, track.xpath(".//instrument[instrumentName='Mix']")[0])
        [acc_path] = subtract_audio([mix.path], [vocals.path])
        acc = Sample(acc_path, vocals.sample_rate, vocals.channels, vocals.duration) # Accompaniment has same signal properties as vocals and mix

        if track.xpath("./databaseSplit")[0].text == "Training":
            train_vocals.append(vocals)
            train_mixes.append(mix)
            train_accs.append(acc)
        else:
            test_vocals.append(vocals)
            test_mixes.append(mix)
            test_accs.append(acc)

    return [train_mixes, train_accs, train_vocals], [test_mixes, test_accs, test_vocals]
Ejemplo n.º 11
0
 def __init__(self,
              Actor,
              Critic,
              env,
              lr=0.001,
              n_actions=1,
              model_file=None):
     self.env = env
     self.lr = lr
     self.actor = Actor
     self.critic = Critic
     self.critic_train = self.critic(trainable=True)
     self.action_bound = [-env.action_space.high, env.action_space.high]
     self.n_features = env.observation_space.shape[0]
     self.n_actions = n_actions
     self.actor_train = self.actor(env,
                                   n_actions,
                                   self.action_bound,
                                   trainable=True)
     self.actor_old = self.actor(env,
                                 n_actions,
                                 self.action_bound,
                                 trainable=False)
     self.actor_net = self.actor_train.build_net(
         input_shape=self.n_features)
     self.critic_net = self.critic_train.build_net(
         input_shape=self.n_features)
     self.actor_oldnet = self.actor_old.build_net(
         input_shape=self.n_features)
     # self.pi = None
     # self.pi_old = None
     self.Sample = Sample(env, self.actor_train, self.critic_train)
Ejemplo n.º 12
0
def setup_samples(parent_dir):
    """
    Reads samples directories and creates objects for each sample.
    """
    print "Reading directory %s ..." % parent_dir

    # Container to keep sample objects
    samples = []

    # Get subdirectories in parent dir
    subdirs = [
        os.path.join(parent_dir, s) for s in os.listdir(parent_dir)
        if os.path.isdir(os.path.join(parent_dir, s))
    ]
    for sd in subdirs:
        # Loop files in sample directory
        abs_sample_path = os.path.abspath(os.path.join(parent_dir, sd))

        # Create sample object
        sample = Sample(abs_sample_path, os.path.abspath(parent_dir))

        # Add to samples collection
        samples.append(sample)

    # Return all samples
    return samples
Ejemplo n.º 13
0
def register_prediction_feedback():
    try:
        sample_text = request.args.get("sample_text", None, type=str)
        sample_prediction = request.args.get("sample_prediction",
                                             None,
                                             type=float)
        sample = Sample(sample_text, prediction_score=sample_prediction)
        correct = request.args.get("correct", None, type=bool)
    except:
        return jsonify(success=False)
    else:
        if correct:
            if sample_prediction > 0.5:
                sample.label = 1
            else:
                sample.label = 0
        else:
            if sample_prediction > 0.5:
                sample.label = 0
            else:
                sample.label = 1
        sample.labeled = True
        push_sample_to_firebase(sample)
        # train_single(sample.text, sample.label)
        return jsonify(success=True)
Ejemplo n.º 14
0
def home():
    context = defaultdict(lambda: "")
    if request.method == "POST":
        sample = Sample(request.form["text"],
                        prediction_score=predict(request.form["text"]))
        context["sample"] = sample
    return render_template("home.html", context=context)
Ejemplo n.º 15
0
def parse_binary(path) -> list:
    name = path.split('/')[-1][0:4]
    collection = []

    with open(path, 'rb') as file:
        n = file.read(8)
        n = struct.unpack('<q', n)[0]

        while True:
            time = file.read(8)
            time = struct.unpack('<q', time)[0]
            pos = np.zeros([3])
            mat = np.zeros([3, 3])
            for i in range(0, 3):
                tmp = file.read(8)
                tmp = struct.unpack('<d', tmp)[0]
                pos[i] = tmp

            for i in range(0, 3):
                for j in range(0, 3):
                    tmp = file.read(8)
                    tmp = struct.unpack('<d', tmp)[0]
                    mat[i, j] = tmp

            pos_f = file.tell()
            collection.append(Sample(name, time, pos, mat))
            if not file.read(1):
                return collection
            else:
                file.seek(pos_f)
Ejemplo n.º 16
0
def vanadiumPlate(width, height, thickness):
    '''create a vanadium plate sample

    all inputs must have units attached
    '''
    plate = shapes.plate(width, height, thickness)
    ret = Sample(name='vanadium', shape=plate)
    return ret
Ejemplo n.º 17
0
 def _extract_sample(file_name):
     '''
     Extract wavfile properties to feed into the Sample class
     '''
     y, rate = librosa.load(file_name, 16000)
     num_channels = 1
     duration = float(librosa.get_duration(y, rate))
     return Sample(file_name, rate, num_channels, duration)
Ejemplo n.º 18
0
def timeshift(dataSet, shift):
    shiftedData = []
    for sample in dataSet:
        shiftedSample = Sample(sample)
        shiftedSample.time += shift
        shiftedData.append(shiftedSample)

    return shiftedData
Ejemplo n.º 19
0
    def recordSample(self, env):
        s = Sample()
        s.time = env.time
        s.simcarPos = env.simcar.p.myclone()
		s.simcarHeading = env.simcar.h.myclone()
		s.simcarFracIndex = env.simcar.fracIndex
		s.simcarSpeed = env.simcar.speed
		s.simcarRoadIndex = env.simcar.roadIndex
Ejemplo n.º 20
0
    def create_object(self, node_type):
        """
        Returns an empty object of the node_type provided. It must be a valid object,
        or else an exception is raised. 
        
        Args:
            node_type (str): The type of object desired. 
        
        Returns:
            A object of the specified type. 
        """
        self.logger.debug("In create_object. Type: %s" % node_type)

        node = None
        if node_type == "project":
            from Project import Project
            self.logger.debug("Creating a Project.")
            node = Project()
        elif node_type == "visit":
            from Visit import Visit
            self.logger.debug("Creating a Visit.")
            node = Visit()
        elif node_type == "subject":
            from Subject import Subject
            self.logger.debug("Creating a Subject.")
            node = Subject()
        elif node_type == "sample":
            from Sample import Sample
            self.logger.debug("Creating a Sample.")
            node = Sample()
        elif node_type == "study":
            from Study import Study
            self.logger.debug("Creating a Study.")
            node = Study()
        elif node_type == "wgs_dna_prep":
            from WgsDnaPrep import WgsDnaPrep
            self.logger.debug("Creating a WgsDnaPrep.")
            node = WgsDnaPrep()
        elif node_type == "16s_dna_prep":
            from SixteenSDnaPrep import SixteenSDnaPrep
            self.logger.debug("Creating a SixteenSDnaPrep.")
            node = SixteenSDnaPrep()
        elif node_type == "16s_raw_seq_set":
            from SixteenSRawSeqSet import SixteenSRawSeqSet
            self.logger.debug("Creating a SixteenSRawSeqSet.")
            node = SixteenSRawSeqSet()
        elif node_type == "wgs_raw_seq_set":
            from WgsRawSeqSet import WgsRawSeqSet
            self.logger.debug("Creating a WgsRawSeqSet.")
            node = WgsRawSeqSet()
        elif node_type == "16s_trimmed_seq_set":
            from SixteenSTrimmedSeqSet import SixteenSTrimmedSeqSet
            self.logger.debug("Creating a SixteenSTrimmedSeqSet.")
            node = SixteenSTrimmedSeqSet()
        else:
            raise ValueError("Invalid node type specified: %" % node_type)

        return node
Ejemplo n.º 21
0
    def calcRelativeSnapSample(self, currentSample, lastEstimatedSample):
        deltaPosition = currentSample.position - lastEstimatedSample.position
        deltaPosition.x = deltaPosition.x * self.snapLimit
        deltaPosition.y = deltaPosition.y * self.snapLimit
        deltaPosition.z = deltaPosition.z * self.snapLimit
        snapPosition = lastEstimatedSample.position + deltaPosition
        snapSample = Sample(currentSample.time, snapPosition)

        return snapSample
Ejemplo n.º 22
0
Archivo: Num.py Proyecto: shants/fss18
 def __init__(self, m=0):
     self.n = 0
     self.mu = 0
     self.m2 = 0
     self.sd = 0
     self.lo = math.pow(10, 32)
     self.hi = -1 * math.pow(10, 32)
     self._some = Sample(m)
     self.w = 1
Ejemplo n.º 23
0
 def __init__(self, mx):
     self.n = 0
     self.mu = 0
     self.m2 = 0
     self.sd = 0
     self.lo = pow(10, 32)
     self.hi = pow(-10, 32)
     self._some = Sample(mx)
     self.w = 1
Ejemplo n.º 24
0
 def addSample(self, file):
     try:
         s = Sample(file)
         s.getParameters()
     
         return s
     except Exception as e:
         print(f"Import Error. {file} not added to project" , e)
         return None
Ejemplo n.º 25
0
    def headingError(self, s = Sample()):
        s2 = Road.Segment()
        s2 = env.road.getSegment(s.simcarRoadIndex)
        s1 = Road.Segment()
        s1 = self.env.road.getSegment(s.simcarRoadIndex - 1)
        rh = Position()
        rh = rh.normalize()

        return (abs(rotAngle(rh.x, rh.z) - rotAngle(s.simcarHeading.x, s.simcarHeading.z)))
Ejemplo n.º 26
0
 def read(self, file_name):
     data = np.empty(NUM_POINTS, dtype=np.float32)
     file_path = self.path + self.delim + file_name
     with open(file_path) as f:
         for i in range(NUM_POINTS):
             line = f.readline()
             data[i] = (int(line))
         metadata = self.parseMetadata(file_name, f.readline())
     return Sample(data, metadata)
Ejemplo n.º 27
0
    def __init__(self, size, env):
        self.popSize = size
        self.env = env
        self.numStepsInCurrentOptimumPath = 1000

        # Create Generation 1 Samples
        self.samples = []
        self.generation = 1
        for i in range(0, self.popSize):
            self.samples.append(Sample(self.env.startPoint))
Ejemplo n.º 28
0
 def loadSamples(self, lineSplit):
     i = 0
     for item in lineSplit:
         if i == 0:
             S = Sample(item)
             self.Samples.append(S)
             #print "Creating " + S.toStr()
         else:
             S.addLipid(self.lipidIndexMap[i - 1], float(item))
         i = i + 1
Ejemplo n.º 29
0
def run(file_path):
    dis = Dissector()
    parser = Parser()
    extrator = Extractor()
    if os.path.isdir(file_path):
        dir_files_list = os.listdir(file_path)
        for files in dir_files_list:
            sample = Sample(os.path.join(file_path, files))
            dis.extract_file(sample)
            parser.parse(sample)
            extrator.extract(sample)
            sample.print_info()

    else:
        sample = Sample(file_path)
        dis.extract_file(sample)
        parser.parse(sample)
        extrator.extract(sample)
        sample.print_info()
Ejemplo n.º 30
0
    def makeSamples(self):
        height, width = self.orginalImage.shape
        sizeOst = (2 * self.maskSize + 1) * (2 * self.maskSize + 1)
        for i in range(height):
            for j in range(width):

                data = self.orginalImage[i - self.maskSize:i + self.maskSize +
                                         1, j - self.maskSize:j +
                                         self.maskSize + 1]
                data = data.flatten()
                decision = self.resultImage[i, j]
                if (data.shape[0] == sizeOst):
                    self.samples.append(Sample(data, decision))
                else:
                    lenght = data.shape[0]
                    diff = sizeOst - lenght
                    data = np.append(np.zeros(diff), data)
                    if (data.shape[0] == sizeOst):
                        self.samples.append(Sample(data, decision))