def h(sample): pdb.set_trace() inputs = Variable(cast(sample[0], opt.dtype)) targets = Variable(cast(sample[1], 'long')) y = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu)) return F.cross_entropy(y, targets), y
def h(sample): global _outputs, _loss connection_map = np.array([[0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0]]) inputs = cast(sample[0], opt.dtype) targets = cast(sample[1], 'long') net1_outputs = data_parallel(f_1, inputs, params_1, sample[2], list(range(opt.ngpu))) net2_outputs = model_2(inputs) net1_outputs = [o.float() for o in net1_outputs] net2_outputs = [o.float() for o in net2_outputs] _loss = [] # hard supervision for i, o in enumerate(net1_outputs): _loss.append(F.cross_entropy(o, targets)) for i, o in enumerate(net2_outputs): _loss.append(F.cross_entropy(o, targets)) outputs = net1_outputs + net2_outputs # soft supervision for i, o in enumerate(outputs): for j, o2 in enumerate(outputs): if connection_map[i, j] > 0: _loss.append(KL_divergence(o2.detach(), o)) loss = sum(_loss) _outputs = net2_outputs[-1].detach() return loss, net1_outputs[-1]
def h(sample): #input 是输入样本 #target是标签 inputs = utils.cast(sample[0], opt.dtype).detach() targets = utils.cast(sample[1], 'long') #如果模型是学生模型 #用给出的损失函数训练 if opt.teacher_id != '': y_s, y_t, loss_groups = utils.data_parallel( f, inputs, params, sample[2], range(opt.ngpu)) #取出总的loss loss_groups = [v.sum() for v in loss_groups] #总的损失? [m.add(v.item()) for m, v in zip(meters_at, loss_groups)] #第一部分是蒸馏#y_s:学生网络的输出#y_t:教师网络的输出#target:真实标签 #第二部分是AD损失函数部分 #第三部分是学生网络的输出 #当是AT算法时,alpha等于0,第一部分。就剩的是学生网络和真实标签的交叉熵 #当为KD算法时,beta等于0,就剩蒸馏损失函数,在这儿实现从1加到c return utils.distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \ + opt.beta * sum(loss_groups), y_s #如果是教师网络 #用标准交叉熵训练 else: #y是网络的输出 y = utils.data_parallel(f, inputs, params, sample[2], range(opt.ngpu))[0] return F.cross_entropy(y, targets), y
def parse_set_to(expr_node): assert isinstance(expr_node, ast.Expr), "set_to node should be Expr" call_node = u.cast(expr_node.value, ast.Call) attribute_node = u.cast(call_node.func, ast.Attribute) name_node = u.cast(attribute_node.value, ast.Name) lhs_var = name_node.id possible_attributes = [ "set_to", "set_to_constant", "observe_value", "set_message" ] assert attribute_node.attr in possible_attributes, "unexpected attribute " + ast.dump( attribute_node) op, args = u.parse_factor_expression(call_node.args[0]) if attribute_node.attr == "set_to_constant": assert op is None, "set_to_constant isn't a constant" op = "SetToConstant" elif attribute_node.attr == "set_to": if op is None: op = "Copy" elif attribute_node.attr == "observe_value": assert op is None, "observe_value isn't a constant" op = "ObserveValue" return lhs_var, [op] + args
def h(sample): inputs = Variable(cast(sample[0], opt.dtype)) targets = Variable(cast(sample[1], 'long')) y = data_parallel(f, inputs, params, stats, sample[2], tuple(range(opt.ngpu))) logit_loss = 0.5 * torch.mean(torch.sum(y * y, 1)) return F.cross_entropy(y, targets) + opt.logitDecay * logit_loss, y
def h(sample): inputs = utils.cast(sample[0], opt.dtype).detach() targets = utils.cast(sample[1], 'long') if opt.teacher_id != '': #loss_groups是什么? print('f = ', f) print('tensor inputs = ', inputs.shape) print('dict params = ', params.keys()) print('sample = ', sample[2]) print('opt.ngpu = ', range(opt.ngpu)) y_s, y_t, loss_groups = utils.data_parallel( f, inputs, params, sample[2], range(opt.ngpu)) print('y_s = ', y_s.shape) print('y_t = ', y_t.shape) print('loss_groups = ', loss_groups) ipdb.set_trace() loss_groups = [v.sum() for v in loss_groups] #计算meters_at,即at_losses注意力loss [m.add(v.item()) for m, v in zip(meters_at, loss_groups)] return utils.distillation( y_s, y_t, targets, opt.temperature, opt.alpha) + opt.beta * sum(loss_groups), y_s else: y = utils.data_parallel(f, inputs, params, sample[2], range(opt.ngpu))[0] return F.cross_entropy(y, targets), y
def parse(soup): rows = soup.select('table.uk-table tr') result = [] for row in rows: link = row.select('a.paper-link') if not link: continue link = link[0] info = row.select('dd.indexed-by')[0].text.strip().split('|') citations = row.select('.index-val')[0].text.strip() result.append({ 'title': link.text, 'url': link['href'], 'publisher': info[0].strip(), 'year': utils.cast(info[3].strip()), 'citations': utils.cast(citations) if is_integer(citations) else 0 }) return result
def parse(soup): rows = soup.select('table.uk-table tr') result = [] for row in rows: link = row.select('a.paper-link') if not link: continue link = link[0] info1 = row.select('.index-val') quartile = info1[0].text.strip() citations = info1[1].text.strip() info2 = row.select('dd.indexed-by')[0].text.strip().split('|') result.append({ 'title': link.text, 'url': link['href'], 'publisher': info2[0].strip(), 'date': info2[3].strip(), 'type': info2[4].strip(), 'quartile': utils.cast(quartile[1]) if re.search(r'^Q[1-4]{1}$', quartile) else '-', 'citations': utils.cast(citations) if is_integer(citations) else 0 }) return result
def parse(soup): rows = soup.select('table.uk-table tr') result = [] for row in rows: link = row.select('a.paper-link') if not link: continue link = link[0] info1 = row.select('dd.indexed-by-orange')[0].text.split('|') dd = row.select('dd') info2 = [i.split(':')[1].strip() for i in dd[2].text.strip().split('\r\n')] members = [member.strip() for member in dd[1].text.split(',') if member.strip()] result.append({ 'title': link.text.strip(), 'scheme': info1[0].split(':')[1].strip(), 'source': info1[1].split(':')[1].strip(), 'members': members, 'application_year': utils.cast(info2[0]), 'event_year': utils.cast(info2[1]), 'fund': utils.cast(re.sub(r'[Rp\.\s\,]', '', info2[2])[:-2]), 'field': dd[3].text.strip(), 'sponsor': row.select('td.uk-text-center')[0].text.strip() }) return result
def compute_loss_test(sample): inputs = cast(sample[0], args.dtype) targets = cast(sample[1], 'long') y = data_parallel(model, inputs, params, sample[2], list(range(args.ngpu))).float() if args.dataset == "awa2": return F.binary_cross_entropy_with_logits(y, targets.float()), y else: return F.cross_entropy(y, targets), y
def h(sample): inputs = Variable(cast(sample[0], opt.dtype)) targets = Variable(cast(sample[1], 'long')) if opt.teacher_id != '': if opt.gamma: ys, y_t_auto, y_t = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu))[:3] loss_l2 = torch.nn.MSELoss() T = 4 loss_student = F.cross_entropy(ys, targets) loss_teacher = F.cross_entropy(y_t_auto, targets) loss_course = opt.beta * \ ((y_t_auto - ys) * (y_t_auto - ys)).sum() / opt.batchSize y_tech_temp = torch.autograd.Variable(y_t_auto.data, requires_grad=False) log_kd = rocket_distillation(ys, y_t, targets, opt.temperature, opt.alpha) return rocket_distillation(ys, y_t, targets, opt.temperature, opt.alpha) \ + F.cross_entropy(y_t_auto, targets) + F.cross_entropy(ys, targets) + opt.beta * ((y_tech_temp - ys) * ( y_tech_temp - ys)).sum() / opt.batchSize, (ys, y_t_auto, loss_student, loss_teacher, loss_course, log_kd) else: y_s, y_t, loss_groups = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu)) loss_groups = [v.sum() for v in loss_groups] [m.add(v.data[0]) for m, v in zip(meters_at, loss_groups)] return distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \ + opt.beta * sum(loss_groups), y_s else: if opt.gamma: ys, y = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu))[:2] loss_l2 = torch.nn.MSELoss() T = 4 loss_student = F.cross_entropy(ys, targets) loss_teacher = F.cross_entropy(y, targets) loss_course = opt.beta * \ ((y - ys) * (y - ys)).sum() / opt.batchSize if opt.grad_block: y_course = torch.autograd.Variable(y.data, requires_grad=False) else: y_course = y return F.cross_entropy(y, targets) + F.cross_entropy( ys, targets) + opt.beta * ( (y_course - ys) * (y_course - ys)).sum() / opt.batchSize, (ys, y, loss_student, loss_teacher, loss_course) else: y = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu))[0] return F.cross_entropy(y, targets), y
def h_ensemble(sample): inputs = Variable(cast(sample[0], opt.dtype)) targets = Variable(cast(sample[1], 'long')) y_grassmann = data_parallel(f_grassmann, inputs, params_grassmann, stats_grassmann, sample[2], np.arange(opt.ngpu)) y_oblique = data_parallel(f_oblique, inputs, params_oblique, stats_oblique, sample[2], np.arange(opt.ngpu)) y_ensemble = y_grassmann + y_oblique return F.cross_entropy(y_ensemble, targets), y_ensemble
def h(sample): inputs = Variable(cast(sample[0], opt.dtype)) targets = Variable(cast(sample[1], 'long')) if opt.teacher_id != '': y_s, y_t, loss_groups = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu)) loss_groups = [v.sum() for v in loss_groups] [m.add(v.data[0]) for m,v in zip(meters_at, loss_groups)] return distillation(y_s, y_t, targets, opt.temperature, opt.alpha) \ + opt.beta * sum(loss_groups), y_s else: y = data_parallel(f, inputs, params, stats, sample[2], np.arange(opt.ngpu))[0] return F.cross_entropy(y, targets), y
def affil_worker(affil_id, worker_result): url = f'http://sinta.ristekbrin.go.id/affiliations/detail?id={affil_id}&view=overview' html = get(url) soup = BeautifulSoup(html.content, 'html.parser') name = soup.select('.au-name')[0].text.strip() affil_url = soup.select('.au-department > a')[0].text.strip() stats = [ utils.cast(soup.select('.stat2-val')[i].text.strip().replace(',', '')) for i in range(9) ] result_data = { 'name': name, 'url': affil_url, 'score': { 'overall': stats[0], 'overall_v2': stats[1], '3_years': stats[3], '3_years_v2': stats[4] }, 'rank': { 'national': stats[2], '3_years_national': stats[5] }, 'journals': stats[6], 'verified_authors': stats[7], 'lecturers': stats[8] } worker_result.append(result_data)
def _analyse(self): # list all sections sections = self.r2ob.cmdj("Sj") sm = filter(lambda a: self.sym_sect in a['name'], sections) if sm: sm = sm[0] else: return # get dynsym section as binary string sym_sect = u.bytes2str( self.r2ob.cmdj("pcj %i@%i" % (sm[self.size_type], sm[self.addr_type]))) # cast to Elf_Sym structures symstr_sect_c = c.create_string_buffer(sym_sect) symstr_sect_l = [] for i in range(len(sym_sect) / c.sizeof(self.Elf_Sym)): offset = i * c.sizeof(self.Elf_Sym) symstr_sect_l.append((u.cast(symstr_sect_c, offset, self.Elf_Sym), offset)) # get dynstr section as binary string ss = filter(lambda a: self.symstr_sect in a['name'], sections)[0] ss_sect = u.bytes2str( self.r2ob.cmdj("pcj %i@%i" % (ss[self.size_type], ss[self.addr_type]))) self.symbols = self._parse_symbols(symstr_sect_l, ss_sect, sm[self.addr_type], ss[self.addr_type])
def resnet(depth, width, num_classes, dropout): assert (depth - 4) % 6 == 0, 'depth should be 6n+4' n = (depth - 4) // 6 widths = [int(v * width) for v in (16, 32, 64)] def gen_block_params(ni, no): return { 'conv0': utils.conv_params(ni, no, 3), 'conv1': utils.conv_params(no, no, 3), 'bn0': utils.bnparams(ni), 'bn1': utils.bnparams(no), 'convdim': utils.conv_params(ni, no, 1) if ni != no else None, } def gen_group_params(ni, no, count): return {'block%d' % i: gen_block_params(ni if i == 0 else no, no) for i in range(count)} flat_params = utils.cast(utils.flatten({ 'conv0': utils.conv_params(3, 16, 3), 'group0': gen_group_params(16, widths[0], n), 'group1': gen_group_params(widths[0], widths[1], n), 'group2': gen_group_params(widths[1], widths[2], n), 'bn': utils.bnparams(widths[2]), 'fc': utils.linear_params(widths[2], num_classes), })) utils.set_requires_grad_except_bn_(flat_params) def block(x, params, base, mode, stride): o1 = F.relu(utils.batch_norm(x, params, base + '.bn0', mode), inplace=True) y = F.conv2d(o1, params[base + '.conv0'], stride=stride, padding=1) o2 = F.relu(utils.batch_norm(y, params, base + '.bn1', mode), inplace=True) if dropout > 0: o2 = F.dropout(o2, p=dropout, training=mode, inplace=False) z = F.conv2d(o2, params[base + '.conv1'], stride=1, padding=1) if base + '.convdim' in params: return z + F.conv2d(o1, params[base + '.convdim'], stride=stride) else: return z + x def group(o, params, base, mode, stride): for i in range(n): o = block(o, params, '%s.block%d' % (base,i), mode, stride if i == 0 else 1) return o def f(input, params, mode): x = F.conv2d(input, params['conv0'], stride=2, padding=1) g0 = group(x, params, 'group0', mode, 1) g1 = group(g0, params, 'group1', mode, 2) g2 = group(g1, params, 'group2', mode, 2) o = F.relu(utils.batch_norm(g2, params, 'bn', mode)) o = F.avg_pool2d(o, 12, 1, 0) o = o.view(o.size(0), -1) o = F.linear(o, params['fc.weight'], params['fc.bias']) return o return f, flat_params
def h(sample): global _outputs, _loss inputs = cast(sample[0], opt.dtype) targets = cast(sample[1], 'long') _outputs = data_parallel(f, inputs, params, sample[2], list(range(opt.ngpu))) _outputs = [o.float() for o in _outputs] _loss = [] for o in _outputs: _loss.append(F.cross_entropy(o, targets)) for o2 in _outputs: if o is not o2: _loss.append(KL_divergence(o2.detach(), o)) loss = sum(_loss) return loss, _outputs[-1]
def set(self, timer_number, *args): """ This can be called either as set(timer_number, attribute_map) or as set(timer_number, attribute_name, new_value). The former set all of the attributes in the specified map on the specified timer to the specified values. The latter sets a single attribute. A quick note: to make it easier to set timer state via autosend, a timer's state can be one of "up", "down", or "stopped", which will be translated by this function to 1, 2, and 3. Normal programs using timerd via the autobus2 module should stick with the numeric constants where possible, though, as these names may change. This name translation also works for the set_attribute function. An additional note: "stop" is accepted as a synonym for "stopped" in the above translation. I just added support for that. """ if len(args) == 1: attributes, = args cast(attributes, dict) else: attributes = {args[0]: args[1]} timer = timer_map[timer_number] if "announce_interval" in attributes: cast(attributes["announce_interval"], int, long) timer.announce_interval = attributes["announce_interval"] if "name" in attributes: cast(attributes["name"], basestring) timer.name = attributes["name"] if "announce_on_state_change" in attributes: cast(attributes["announce_on_state_change"], bool) timer.announce_on_state_change = attributes["announce_on_state_change"] if "announce_count" in attributes: cast(attributes["announce_count"], int, long) timer.announce_count = attributes["announce_count"] if "state" in attributes: if isinstance(attributes["state"], basestring): attributes["state"] = {"up": 1, "down": 2, "stop": 3, "stopped": 3}[attributes["state"]] if attributes["state"] != timer.state: timer.set_state(attributes["state"]) timer.on_manual_state_change() # state_change_event(timer.number, timer.state) publish_timer_object()
def resnet(depth, width, num_classes): assert (depth - 4) % 6 == 0, 'depth should be 6n+4' n = (depth - 4) // 6 widths = [int(v * width) for v in (16, 32, 64)] def gen_block_params(ni, no): return { 'conv0': utils.conv_params(ni, no, 3), 'conv1': utils.conv_params(no, no, 3), 'bn0': utils.bnparams(ni), 'bn1': utils.bnparams(no), 'convdim': utils.conv_params(ni, no, 1) if ni != no else None, } def gen_group_params(ni, no, count): return {'block%d' % i: gen_block_params(ni if i == 0 else no, no) for i in range(count)} flat_params = utils.cast(utils.flatten({ 'conv0': utils.conv_params(3, 16, 3), 'group0': gen_group_params(16, widths[0], n), 'group1': gen_group_params(widths[0], widths[1], n), 'group2': gen_group_params(widths[1], widths[2], n), 'bn': utils.bnparams(widths[2]), 'fc': utils.linear_params(widths[2], num_classes), })) utils.set_requires_grad_except_bn_(flat_params) def block(x, params, base, mode, stride): o1 = F.relu(utils.batch_norm(x, params, base + '.bn0', mode), inplace=True) y = F.conv2d(o1, params[base + '.conv0'], stride=stride, padding=1) o2 = F.relu(utils.batch_norm(y, params, base + '.bn1', mode), inplace=True) z = F.conv2d(o2, params[base + '.conv1'], stride=1, padding=1) if base + '.convdim' in params: return z + F.conv2d(o1, params[base + '.convdim'], stride=stride) else: return z + x def group(o, params, base, mode, stride): for i in range(n): o = block(o, params, '%s.block%d' % (base,i), mode, stride if i == 0 else 1) return o def f(input, params, mode): x = F.conv2d(input, params['conv0'], padding=1) g0 = group(x, params, 'group0', mode, 1) g1 = group(g0, params, 'group1', mode, 2) g2 = group(g1, params, 'group2', mode, 2) o = F.relu(utils.batch_norm(g2, params, 'bn', mode)) o = F.avg_pool2d(o, 8, 1, 0) o = o.view(o.size(0), -1) o = F.linear(o, params['fc.weight'], params['fc.bias']) return o return f, flat_params
def parse(self, assign_node): if len(assign_node.targets) > 1: return False if u.is_constant_definition(assign_node): return None self.name = assign_node.targets[0].id rhs = assign_node.value if isinstance(rhs, ast.Call): call_node = u.cast(rhs, ast.Call) self.parse_call(call_node) self.array_size = None elif isinstance(rhs, ast.Subscript): subscript_node = u.cast(rhs, ast.Subscript) call_node = u.cast(subscript_node.value, ast.Call) self.parse_call(call_node) self.array_size = u.get_index(subscript_node)
def h(sample): inputs = utils.cast(sample[0], opt.dtype).detach() targets = utils.cast(sample[1], 'long') if opt.teacher_id != '': if opt.kt_method == "at": y_s, y_t, loss_groups = utils.data_parallel( f, inputs, params, sample[2], range(opt.ngpu)) loss_groups = [v.sum() for v in loss_groups] [m.add(v.item()) for m, v in zip(meters_at, loss_groups)] return utils.distillation( y_s, y_t, targets, opt.temperature, opt.alpha) + opt.beta * sum(loss_groups), y_s elif opt.kt_method == "st": y_s, y_t, loss_groups = utils.data_parallel( f, inputs, params, sample[2], range(opt.ngpu)) return torch.sqrt(torch.mean((y_s - y_t)**2)), y_s else: y = utils.data_parallel(f, inputs, params, sample[2], range(opt.ngpu))[0] return F.cross_entropy(y, targets), y
def _analyse(self): elf_phdr = u.bytes2str( self.r2ob.cmdj("pcj %i@%i" % (self.Elf_Phdr_size * self.phnum, self.phoff))) elf_phdr_c = c.create_string_buffer(elf_phdr) segments_l = [] for i in range(len(elf_phdr) / self.Elf_Phdr_size): offset = i * self.Elf_Phdr_size segments_l.append((u.cast(elf_phdr_c, offset, self.Elf_Phdr), offset)) self.phdrs = [i for i in self._parse_segments(segments_l)]
def _analyse(self): elf_dyn = u.bytes2str( self.r2ob.cmdj( "pcj %i@%i" % (self.Elf_Dyn_size * self.Elf_Dyn_num, self.dynseg_off))) elf_dyn_c = c.create_string_buffer(elf_dyn) dyns = [] for i in range(len(elf_dyn) / self.Elf_Dyn_size): offset = i * self.Elf_Dyn_size dyns.append((u.cast(elf_dyn_c, offset, self.Elf_Dyn), offset)) self.dyns = [i for i in self._parse_dyns(dyns)]
def author_comm_services(author_id, output_format='dictionary', pretty_print=None, xml_library='dicttoxml', max_workers=None): url = f'http://sinta.ristekbrin.go.id/authors/detail?id={author_id}&view=services' html = get(url) soup = BeautifulSoup(html.content, 'html.parser') page_info = soup.select('.uk-width-large-1-2.table-footer') n_page = utils.cast(page_info[0].text.strip().split()[3]) worker_result = parse(soup) with ThreadPoolExecutor(max_workers=max_workers) as executor: for page in range(2, n_page + 1): executor.submit(worker, author_id, page, worker_result) return utils.format_output(worker_result, output_format, pretty_print, xml_library)
def _analyse_reln(self, addend=False): r_c = self.relsz[0]["d_val"] if not addend else self.relasz[0]["d_val"] v_addr = self.rel[0]["d_ptr"] if not addend else self.rela[0]["d_ptr"] sz = self.Elf_Rel_size if not addend else self.Elf_Rela_size reln = self.Elf_Rel if not addend else self.Elf_Rela relocs = u.bytes2str(self.r2ob.cmdj("pcj %i@%i" % (sz * r_c, v_addr))) relocs_c = c.create_string_buffer(relocs) rels = [] for i in range(len(relocs) / sz): offset = i * sz rels.append((u.cast(relocs_c, offset, reln), v_addr + offset)) for i in self._parse_reln(rels, addend): self.relocs.append(i)
def make_switch_group(if_node, parent_case): cases_ast = u.if_and_or_else_blocks(if_node) switch_group = SwitchGroup(None, parent_case.num_switch_groups()) switch_group.set_parent_case(parent_case) switch_group.var_name = None for if_node in cases_ast: compare_node = u.cast(if_node.test, ast.Compare) var_name, val = u.parse_compare(compare_node) if switch_group.var_name is None: switch_group.var_name = var_name else: assert var_name == switch_group.var_name, "if blocks must switch on same var" case_node = make_case_node(if_node.body, var_name, val) switch_group.add_case(val, case_node) return switch_group
def parse_ssearch36(lines, numeric=False): """ Parse output of 'ssearch36 -m 10 query.fasta library.fasta' Return an iterator of dicts containing alignment data read from file-like object `lines`. Coerce strings to ints or floats if numeric is True. Each alignment of a single query sequence to multiple targets (ie, different target sequences or multiple regions within the same target) is represented by an element in the output; use 'groupby(results, key = lambda hit: hit['q_name'])' to group by query. Note: use 'ssearch36 -a' to retain full sequence. """ query_count = 0 hit_count = 0 keeplines = False prefix = None hit = defaultdict() # enforce utf-8 encoding lines = (l.decode('iso8859-1').encode('utf-8') for l in lines) for line in lines: line = line.rstrip('\n') if line.startswith('>>><<<'): # query end keeplines = False elif line.startswith('>>>'): # start of a new hit if not line.startswith('>>>///'): query_count += 1 q_name = line.lstrip('>').split(',')[0] elif line.startswith('>>') or line.startswith('>--'): # hit-specific results; keep results starting here if prefix: yield hit hit_count += 1 if line.startswith('>>'): t_description = line[2:] t_name = t_description.split()[0] prefix = '' keeplines = True hit = {'q_name': q_name, 't_name': t_name, 't_description': t_description, 'q_seq': '', 't_seq': ''} elif line.startswith('>'): prefix = 't_' if prefix else 'q_' elif line.startswith(';') and keeplines: k, v = line.lstrip('; ').split(':', 1) k = k.replace(gap, '').replace(' ', '_').lower() if k == 'al_cons': hit[k] = '' else: hit[prefix + k] = utils.cast(v) if numeric else v.strip() elif prefix and keeplines: if 'al_cons' in hit: hit['al_cons'] += line else: hit[prefix + 'seq'] += line.strip() else: continue if hit: yield hit log.info('%s queries, %s hits' % (query_count, hit_count))
def h(sample): inputs = cast(sample[0], opt.dtype) targets = cast(sample[1], 'long') y = data_parallel(f, inputs, params, sample[2], list(range(opt.ngpu))).float() return F.cross_entropy(y, targets), y
def worker(author_id, worker_result): url = f'http://sinta.ristekbrin.go.id/authors/detail?id={author_id}&view=overview' html = get(url) soup = BeautifulSoup(html.content, 'html.parser') name = soup.select('.au-name')[0].text.strip() areas = [area.text.strip() for area in soup.select('.area-item')] scores_soup = soup.select('.stat2-val') index_score_names = [ 'documents', 'citations', 'h-index', 'i10-index', 'g-index' ] index_scores = soup.select('.stat-num-pub') scopus = { index_score_names[i]: utils.cast(index_scores[i + 16].text) for i in range(len(index_score_names)) } scopus_outputs_names = ['articles', 'conferences', 'others'] scopus_outputs = { scopus_outputs_names[i]: utils.cast(soup.select('.stat-num-pub')[i].text) for i in range(3) } scopus_quartiles = { f'Q{i}': utils.cast(soup.select('.stat-num-pub')[i + 3].text) for i in range(1, 5) } scopus = {**scopus, **scopus_outputs, **scopus_quartiles} scopus['undefined'] = utils.cast(soup.select('.stat-num-pub')[8].text) scholar = { index_score_names[i]: utils.cast(index_scores[i + 21].text) for i in range(len(index_score_names)) } wos = { index_score_names[i]: utils.cast(index_scores[i + 26].text) for i in range(len(index_score_names)) } sinta = {f'S{i}': utils.cast(index_scores[i + 9].text) for i in range(6)} sinta['uncategorized'] = utils.cast(index_scores[15].text) score_names = ['overall', '3_years', 'overall_v2', '3_years_v2'] scores = { score_name: float(scores_soup[i].text) for i, score_name in enumerate(score_names) } books = utils.cast(scores_soup[4].text) ipr = utils.cast(scores_soup[7].text) rank_names = [ 'national', '3_years_national', 'ipr', 'affiliation', '3_years_affiliation' ] ranks = { rank_names[i]: utils.cast(scores_soup[i + 5].text) for i in [0, 1, 3, 4] } affil = soup.select('.au-affil > a') dept = soup.select('.au-department')[0].text.strip() affil_name = affil[0].text.strip() affil_url = 'http://sinta.ristekbrin.go.id' + affil[0]['href'] affil_id = re.search(r'id=(\d+)', affil_url).group(1) result_data = { 'id': author_id, 'name': name, 'url': url, 'affiliation': { 'id': affil_id, 'name': affil_name, 'url': affil_url }, 'department': dept, 'areas': areas, 'score': scores, 'rank': ranks, 'scopus': scopus, 'scholar': scholar, 'wos': wos, 'sinta': sinta, 'books': books, 'ipr': ipr } worker_result.append(result_data)
def h(sample): inputs = Variable(cast(sample[0], opt.dtype)) targets = Variable(cast(sample[1], 'long')) y = data_parallel(f, inputs, params, stats, sample[2], list(range(opt.ngpu))) return F.cross_entropy(y, targets), y
def compute_loss(sample): if not args.ssl: inputs = cast(sample[0], args.dtype) targets = cast(sample[1], 'long') y = data_parallel(model, inputs, params, sample[2], list(range(args.ngpu))).float() if args.dataset == "awa2": return F.binary_cross_entropy_with_logits(y, targets.float()), y else: return F.cross_entropy(y, targets), y else: global counter l = sample[0] u = sample[1] inputs_l = cast(l[0], args.dtype) targets_l = cast(l[1], 'long') inputs_u = cast(u[0], args.dtype) y_l = data_parallel(model, inputs_l, params, sample[2], list(range(args.ngpu))).float() y_u = data_parallel(model, inputs_u, params, sample[2], list(range(args.ngpu))).float() if args.dataset == "awa2": loss = F.binary_cross_entropy_with_logits( y_l, targets_l.float()) else: loss = F.cross_entropy(y_l, targets_l) if args.min_entropy: if args.dataset == "awa2": labels_pred = F.sigmoid(y_u) entropy = -torch.sum(labels_pred * torch.log(labels_pred), dim=1) else: labels_pred = F.softmax(y_u, dim=1) entropy = -torch.sum(labels_pred * torch.log(labels_pred), dim=1) if counter >= 10: loss_entropy = args.unl_weight * torch.mean(entropy) loss += loss_entropy elif args.semantic_loss: if args.dataset == "awa2": labels_pred = F.sigmoid(y_u) else: labels_pred = F.softmax(y_u, dim=1) part1 = torch.stack([ labels_pred**all_labels[i] for i in range(all_labels.shape[0]) ]) part2 = torch.stack([(1 - labels_pred)**(1 - all_labels[i]) for i in range(all_labels.shape[0])]) sem_loss = -torch.log( torch.sum(torch.prod(part1 * part2, dim=2), dim=0)) if counter >= 10: semantic_loss = args.unl_weight * torch.mean(sem_loss) loss += semantic_loss elif args.lp: model_y.eval() if args.dataset == "awa2": labels_pred = F.sigmoid(y_u) else: labels_pred = F.softmax(y_u, dim=1) if num_classes % 2: labels_pred = torch.cat( (labels_pred, torch.zeros( (labels_pred.shape[0], 1)).to("cuda:0")), dim=1) _, nll_ypred = model_y(labels_pred) if counter >= 10: loss_nll_ypred = args.unl_weight * torch.mean(nll_ypred) loss += loss_nll_ypred model_y.train() optimizer_y.zero_grad() if args.dataset == "awa2": a = targets_l.float() * 120. + (1 - targets_l.float()) * 1.1 b = (1 - targets_l.float()) * 120. + targets_l.float() * 1.1 beta_targets = Beta(a, b).rsample() if num_classes % 2: beta_targets = torch.cat( (beta_targets, torch.zeros( (beta_targets.shape[0], 1)).to("cuda:0")), dim=1) zs, nll_y = model_y(beta_targets) else: one_hot_targets = F.one_hot(torch.tensor(targets_l), num_classes).float() one_hot_targets = one_hot_targets * 120 + ( 1 - one_hot_targets) * 1.1 dirichlet_targets = torch.stack( [Dirichlet(i).sample() for i in one_hot_targets]) zs, nll_y = model_y(dirichlet_targets) loss_nll_y = torch.mean(nll_y) loss_nll_y.backward() optimizer_y.step() return loss, y_l
def __init__(self, val): self.val = utils.cast(val, 'unsigned long')
def resnet(depth, width, num_classes, dropout, level=None): assert (depth - 4) % 6 == 0, 'depth should be 6n+4' assert level is None or level in [2, 3], 'level should be 2, 3 or None' n = (depth - 4) // 6 widths = [int(v * width) for v in (16, 32, 64)] def gen_harmonic_params(ni, no, k, normalize=False, level=None, linear=False): nf = k**2 if level is None else level * (level + 1) // 2 paramdict = { 'conv': utils.dct_params(ni, no, nf) if linear else utils.conv_params( ni * nf, no, 1) } if normalize and not linear: paramdict.update({'bn': utils.bnparams(ni * nf, affine=False)}) return paramdict def gen_block_params(ni, no): return { 'harmonic0': gen_harmonic_params(ni, no, k=3, normalize=False, level=level, linear=True), 'harmonic1': gen_harmonic_params(no, no, k=3, normalize=False, level=level, linear=True), 'bn0': utils.bnparams(ni), 'bn1': utils.bnparams(no), 'convdim': utils.conv_params(ni, no, 1) if ni != no else None, } def gen_group_params(ni, no, count): return { 'block%d' % i: gen_block_params(ni if i == 0 else no, no) for i in range(count) } flat_params = utils.cast( utils.flatten({ 'dct0': utils.dct_filters(n=3, groups=3), 'dct': utils.dct_filters(n=3, groups=int(width) * 64, expand_dim=0, level=level), 'harmonic0': gen_harmonic_params(3, 16, k=3, normalize=True, level=None), 'group0': gen_group_params(16, widths[0], n), 'group1': gen_group_params(widths[0], widths[1], n), 'group2': gen_group_params(widths[1], widths[2], n), 'bn': utils.bnparams(widths[2]), 'fc': utils.linear_params(widths[2], num_classes), })) utils.set_requires_grad_except_bn_(flat_params) def harmonic_block(x, params, base, mode, stride=1, padding=1): y = F.conv2d(x, params['dct0'], stride=stride, padding=padding, groups=x.size(1)) if base + '.bn.running_mean' in params: y = utils.batch_norm(y, params, base + '.bn', mode, affine=False) z = F.conv2d(y, params[base + '.conv'], padding=0) return z def lin_harmonic_block(x, params, base, mode, stride=1, padding=1): filt = torch.sum(params[base + '.conv'] * params['dct'][:x.size(1), ...], dim=2) y = F.conv2d(x, filt, stride=stride, padding=padding) return y def block(x, params, base, mode, stride): o1 = F.relu(utils.batch_norm(x, params, base + '.bn0', mode), inplace=True) y = lin_harmonic_block(o1, params, base + '.harmonic0', mode, stride=stride, padding=1) o2 = F.relu(utils.batch_norm(y, params, base + '.bn1', mode), inplace=True) if dropout > 0: o2 = F.dropout(o2, p=dropout, training=mode, inplace=False) z = lin_harmonic_block(o2, params, base + '.harmonic1', mode, stride=1, padding=1) if base + '.convdim' in params: return z + F.conv2d(o1, params[base + '.convdim'], stride=stride) else: return z + x def group(o, params, base, mode, stride): for i in range(n): o = block(o, params, '%s.block%d' % (base, i), mode, stride if i == 0 else 1) return o def f(input, params, mode): x = harmonic_block(input, params, 'harmonic0', mode, stride=2, padding=1) g0 = group(x, params, 'group0', mode, 1) g1 = group(g0, params, 'group1', mode, 2) g2 = group(g1, params, 'group2', mode, 2) o = F.relu(utils.batch_norm(g2, params, 'bn', mode)) o = F.avg_pool2d(o, 12, 1, 0) o = o.view(o.size(0), -1) o = F.linear(o, params['fc.weight'], params['fc.bias']) return o return f, flat_params
def check_high_missing_values(self, X_train, X_test, features_type, min_missing): """ This function check the features regarding the missing values based. In qualitative features the will be filled with the word 'other'. This apply for features with less than 'min_missing' of missing values, otherwise the feature will be removed. Parameters ---------- X_train : pd.DataFrame Train set information X_test : pd.DataFrame Test set information features_type : dict[str : list[str]] Dictionary that contains two keys: qualitatives and quantitatives. The values are the list of features names respectively. min_missing : float Min proportion of missing allowed for don't remove a feature. Return ------ X_train : pd.DataFrame Train set information X_test : pd.DataFrame Test set information features_type : dict[str : list[str]] Dictionary that contains two keys: qualitatives and quantitatives. The values are the list of features names respectively. """ X_train_c = X_train.copy() X_test_c = X_test.copy() if not self.html: self.html = """<html><head>""" #self.html += """<link rel = "stylesheet" href = "style.css"/>""" self.html += """</head><body><h1><center>Processing Report</center></h1>""" if not self.logger: self.logger = log('../data/output/', 'logs.txt') self.html += "<h2><center>Check missing values:</center></h2>" vars_remove = [] # Computing proportion of nulls prop_missing = X_train_c.isnull().mean() for x in np.hstack(list(features_type.values())): # if the feature has missing values, but this its proportion # is less than min_missing, then the values will be imputed, otherwise # the feature will be removed if 0 < prop_missing.loc[x] <= min_missing: if x in features_type['qualitative']: val_imputer = 'other' X_train_c[x] = X_train_c[x].fillna(val_imputer) X_test_c[x] = X_test_c[x].fillna(val_imputer) str_ = 'Feature ' + x + ' was imputer with "' + val_imputer + '"' self.logger.info(str_) self.html += str_ + '<br>' elif prop_missing.loc[x] > min_missing: vars_remove.append(x) else: pass if len(vars_remove) == 0: self.html += """None feature was removed""" self.logger.info( 'None feature were removed because the missing values') else: X_train_c = X_train_c.drop(columns=vars_remove) X_test_c = X_test_c.drop(columns=vars_remove) quali_vars = features_type['qualitative'] quanti_vars = features_type['quantitative'] features_type = {} features_type['qualitative'] = [ x for x in quali_vars if x not in vars_remove ] features_type['quantitative'] = [ x for x in quanti_vars if x not in vars_remove ] self.logger.info('Features: ' + str(vars_remove) + ' were removed because the missing values') self.logger.info('Check the missing values finished!') self.logger.info('Cast features is starting...') X_train_c = cast(X_train_c, features_type) X_test_c = cast(X_test_c, features_type) self.logger.info('Cast features finished!') return X_train_c, X_test_c, features_type, self.html