def conv(self, conv, weight, bias = None, **kargs): h = self.errors inter = h if h is None else h.view(-1, *h.size()[2:]) hd = conv(self.head, weight, bias=bias, **kargs) res = h if h is None else conv(inter, weight, bias=None, **kargs) return self.new( hd , None if self.beta is None else conv(self.beta, weight.abs(), bias = None, **kargs) , h if h is None else res.view(h.size()[0], h.size()[1], *res.size()[1:]))
def bmm(self, other): hd = self.head.unsqueeze(1).bmm(other).squeeze(1) bet = None if self.beta is None else self.beta.unsqueeze(1).bmm( other.abs()).squeeze(1) if self.errors is None: er = None else: bigOther = other.expand(self.errors.size()[0], -1, -1, -1) h = self.errors inter = h.view(-1, *h.size()[2:]).unsqueeze(1) bigOther = bigOther.contiguous().view(-1, *bigOther.size()[2:]) er = inter.bmm(bigOther) er = er.view(*h.size()[:-1], -1) return self.new(hd, bet, er)
def train_batch(self, inputs, targets, weights=None, update=True, logname="train"): if update: self.set_training(True) self.optimizer.zero_grad() else: self.set_training(False) self.set_inputs(inputs) self.forward() if weights is not None: self.cuweights = autograd.Variable(torch.randn(1, 1).cuda()) dlh.assign(self.cuweights, weights, False) self.cuoutput = self.weighted(self.cuoutput, self.cuweights) culoss = self.compute_loss(targets, weights=weights) if update: culoss.backward() self.optimizer.step() ploss = dlh.novar(culoss)[0] self.ntrain += dlh.size(inputs, 0) add_log(self.log, logname, loss=ploss, ntrain=self.ntrain, lr=self.current_lr) return self.get_outputs(), ploss
def decompress(file_name): (file_in,file_out) = helpers.start_decompress(file_name, ALG_NAME) p = file_in.tell() # go to last byte file_in.seek(-8,2) # store how much padding is in second to last byte pad = file_in.read(BYTE_SIZE) padding = int(pad,2) # set position to start of file file_in.seek(p,0) i = file_in.read(BYTE_SIZE) header_left = int(i,2) + 1 header_left_original = header_left print "Constructing huffman tree ...." # get frequency list from header freq_list = [] while (header_left > 0): freq = int(file_in.read(4*BYTE_SIZE),2) val = int(file_in.read(BYTE_SIZE),2) freq = freq + val/1000. header_left -= 1 freq_list.append((freq,val)) # get dictionary of codes for given bytes codes = _add_codes(_build_tree(freq_list),{},'') print "Done!" print "Writing file ..." # flip dictionary inv_codes = helpers.inverse_dict(codes) i = file_in.read(READ_IN_SIZE) code = i counter = 0 # take header, padding, and signature into account stop = (helpers.size(file_in.name) - ((2*BYTE_SIZE) + padding + p + \ (header_left_original *5 * BYTE_SIZE) )) while (counter < stop): i = file_in.read(READ_IN_SIZE) counter += 1 if (code in inv_codes): file_out.write(helpers.to_bin(inv_codes[code], BYTE_SIZE)) code = i else: code = code + i return helpers.end_decompress(file_in,file_out)
def estimate(file_name) : #frequency list sample of the file freq_list = helpers.freq_list(file_name, "sample") freq_dict = {} if helpers.freq_list_sample_ratio(file_name) != 1 and len(freq_list) < 256: for n in freq_list : (fr, c) = n freq_dict[c] = float(fr) i = 0 total_missing = 0 while (i < 256) : if chr(i) not in freq_dict : freq_dict[i] = 1 total_missing += 1 i += 1 prob_dict = {} for f in freq_dict : prob_dict[f] = freq_dict[f] / \ (helpers.freq_list_sample_size(file_name) + total_missing) to_simulate = helpers.size(file_name) - \ helpers.freq_list_sample_size(file_name) for f in prob_dict : freq_dict[f] += (to_simulate * prob_dict[f]) freq_list = [] for key in freq_dict : freq_list.append((freq_dict[key], key)) # build dictionary codes = _add_codes(_build_tree(freq_list),{},'') new_freq_list = [] if helpers.freq_list_sample_ratio(file_name) != 1 : for pair in freq_list : (freq, val) = pair new_freq_list.append((freq * \ helpers.freq_list_sample_ratio(file_name), val)) freq_list = new_freq_list # find total bits in first compressed sample_size bytes total_bits = 0 for pair in freq_list: (freq, val) = pair total_bits += freq * len(codes[val]) header_size = len(freq_list) * 5 total_bytes = 2*BYTE_SIZE + header_size + total_bits / BYTE_SIZE return total_bytes
def decompress(file_name): (file_in,file_out) = helpers.start_decompress(file_name, ALG_NAME) # convert single element (in dict or content) from binary form def de_bin (i) : # if its an int if i == '0' : key = '' # until hit a null byte, convert to string form while i != "00000000" and i != '' : i = file_in.read(BYTE_SIZE) if i != "00000000" and i != '' : key = key + chr(int(i,2)) # convert back to int key = int(key) # if its a string elif i == '1' : key = '' # until hit a null byte, convert to string form while i != "00000000" and i != '' : i = file_in.read(BYTE_SIZE) if i != "00000000" and i != '' : key = key + chr(int(i,2)) # return decoded element return key # find and remove any padding at end of file p = file_in.tell() file_in.seek(-BYTE_SIZE,2) insig = int(file_in.read(BYTE_SIZE),2) # find length of dictionary file_in.seek(-(BYTE_SIZE*6),2) s = file_in.read((BYTE_SIZE*5)-insig) dict_size = int(s,2) file_in.seek(p,0) # find length of dictionary file_in.seek(-(BYTE_SIZE*5),2) s = file_in.read((BYTE_SIZE*5)) dict_size = int(s,2) file_in.seek(p,0) # read first id in dictionary i = file_in.read(1) # decoded dictionary dictionary = {} temp = [] counter = 0 # read until end of dictionary while file_in.tell() <= (dict_size + p) : # if element is marked as an int if i == '0' : acc = '' # read until end of element (marked by nul) while i != "00000000" and i != '' : i = file_in.read(BYTE_SIZE) if i != "00000000" and i != '' : acc = acc + chr(int(i,2)) # create whole element from individual strings if acc != '\x00' and acc != '' : temp.append(int(acc)) counter += 1 # read next id i = file_in.read(1) # if element is marked as a string elif i == '1' : acc = '' # read until end of element (marked by nul) while i != "00000000" and i != '' : i = file_in.read(BYTE_SIZE) if i != "00000000" and i != '' : acc = acc + chr(int(i,2)) # create whole element from individual strings if acc != '\x00' and acc != '' : temp.append(acc) counter += 1 # read next id i = file_in.read(1) # otherwise raise an error if id isn't 1 or 0 else : raise TypeError ("must start with 0 or 1") # if 3 elements read, you have a dictionary entry if counter == 3 : # the first is the key, the next two are the value (a tuple) enter = {temp[0]: (temp[1],temp[2])} temp = [] counter = 0 # add new entry to finished dictionary dictionary.update(enter) # list of decoded content content = [] # read until end of content while file_in.tell() < helpers.size(file_in.name) - (BYTE_SIZE*5) : # if element is marked as an int if i == '0' : acc = '' # read until end of element (marked by nul) while i != "00000000" and i != '' : i = file_in.read(BYTE_SIZE) if i != "00000000" and i != '' : acc = acc + chr(int(i,2)) # create whole element from individual strings if acc != '\x00' and acc != '' : content.append(int(acc)) # read next id i = file_in.read(1) # if element is marked as a string elif i == '1' : acc = '' # read until end of element (marked by nul) while i != "00000000" and i != '' : i = file_in.read(BYTE_SIZE) if i != "00000000" and i != '' : acc = acc + chr(int(i,2)) # create whole element from individual strings if acc != '\x00' and acc != '' : content.append(acc) # read next id i = file_in.read(1) # otherwise raise an error if id isn't 1 or 0 else : raise TypeError ("must start with 0 or 1") # replace rules with original digrams to decompress content = _decode(content, dictionary) # write out the finished, decoded file for i in content: if len(i) == 1 : file_out.write(helpers.to_bin(ord(i),BYTE_SIZE)) else : for j in i : file_out.write(helpers.to_bin(ord(j),BYTE_SIZE)) # now you're done! return helpers.end_decompress(file_in,file_out)
def estimate(file_name) : return helpers.size(file_name)