def to_csv(): # read type names with open(TYPES + 'types.txt') as f: types = lines(f) # read type effectiveness, put it into table format # [["", type1, type2], # [type1, 1, 2 ], # [type2, 4, 2 ]] with open(TYPES + 'typestable.txt') as f: headers = [""] + types # effectiveness is a 2d array effectiveness = [line.split(" ") for line in lines(f)] table = [headers] +\ [[types[attacker]] +\ [effectiveness[attacker][defender] for defender in range(len(effectiveness[attacker]))] for attacker in range(len(effectiveness))] #print table # write the type effectiveness to a csv with open(TYPES + 'types.csv', 'wb') as f: typesWriter = csv.writer(f) typesWriter.writerows(table)
def processRunning(host, pidfile, name=""): cmdline = util.lines(host.execute("[ -f %(pidfile)s ] && (cat %(pidfile)s | xargs -r ps --no-headers --format cmd --pid); true" % {"pidfile": util.escape(pidfile)})) if not len(cmdline): return False if name: return name in cmdline[0]
def playlist_contains(song_id, playlist_name): if not playlist_exists(playlist_name): raise KeyError(f'playlist {playlist_name} doesn\'t exist') playlist_dir = file_management.get_playlists_path() playlist_dest = os.path.join(playlist_dir, playlist_name) return song_id in util.lines(open(playlist_dest))
def load_splits_raw(self): self.split_files = [ ( self.conf.data_dir / self.name / split_name / self.lang ).with_suffix(".conllu") for split_name in self.split_names] self.splits_raw = [ conllu.parse("\n".join(lines(f))) for f in self.split_files]
def do_it(filename): for line in util.lines(filename): split = line.split() n = Node.get_node(split[0] + split[1]) for inner in range(4, len(split), 4): if split[inner] != "no": n.add(split[inner + 1] + split[inner + 2], int(split[inner])) return len(Node.nodes['shinygold'].distinct_parents())
def do_it(filename): program = [[l.split()[0], int(l.split()[1])] for l in util.lines(filename)] result = NOT_FOUND pos = 0 while result == NOT_FOUND: curr, pos = replace_nop_jmp(program, pos) result = parse_and_run(curr) return result
def do_it(filename): for line in util.lines(filename): split = line.split() n = Node.get_node(''.join(split[0:2])) for inner in range(4, len(split), 4): if split[inner] != "no": n.add(''.join(split[inner + 1:inner + 3]), int(split[inner])) return Node.get_node('shinygold').inner_bags() - 1
def read_sift(sift_fname): """ Feature format: [[x, y, scale, orientation], ...] """ lines = ut.lines(sift_fname) if len(lines): fd = np.array([map(float, line.split()) for line in lines]) f = fd[:, :4] d = np.uint8(fd[:, 4:]) return f, d else: return np.zeros((4, 0)), np.uint8(np.zeros((128, 0)))
def testLines(self): pass from util import lines f = open('test_input.txt') try: for line, item in enumerate(lines(f)): print('%d\t %s' % (line, item)) finally: f.close() print("test") self.failUnless(1 == 1, "Failed")
def parse_file(self, file): sents = split_iter(lines(file), lambda line: line == "") sents = islice(filter(bool, sents), self.conf.max_ninst) def parse_sent(sent): parts = map_assert( str.split, lambda parts: len(parts) in {3, 7}, sent) forms, tags = zip(*map(lambda ps: (ps[0], ps[-1]), parts)) assert len(forms) == len(tags) == len(sent) return [ {"form": form, "ner": tag} for form, tag in zip(forms, tags)] return list(map_skip_assert_error(parse_sent, sents, verbose=True))
def downloadCaptureUri(host, name, onlyLatest=False): filename = "%s.pcap" % name path = host.getHostServer().randomFilename() if onlyLatest: print path latest = util.lines(host.execute("ls -t1 %s | head -n1" % _remoteDir(name)))[0] if latest: fileutil.copy(host, "%s/%s" % (_remoteDir(name), latest), path) else: host.execute("tcpslice -w %s %s/*" % (path, _remoteDir(name))) if not fileutil.existsFile(host, path) or not fileutil.fileSize(host, path): raise fault.new("No packages captured yet") return host.getHostServer().downloadGrant(path, filename=filename)
def show_playlist(playlist_name): filename = f'{playlist_name}.playlist' playlist_dir = file_management.get_playlists_path() playlist_dest = os.path.join(playlist_dir, filename) if not os.path.exists(playlist_dest): print(f'playlist does not exist: {playlist_dest}') raise KeyError res = [playlist_name] for song_id in util.lines(open(playlist_dest)): res.append(f'[{song_id}] {songs.get_song_info(song_id)["title"]}') return res
def run(): print '''<div type="book" osisID="%s" canonical="true"> <title type="main">%s</title>''' % (bookID, title) for line in lines(sys.stdin): line = re.sub( chapterPattern, r'<chapter osisID="%s." chapterTitle="\1">\n<title type="chapter">\1</title>' % bookID, line) line = re.sub(versePattern, r'<verse osisID="%s.\1.\2">\3</verse>' % bookID, line) line = re.sub(r'\*\*', r'</chapter>', line) print line.strip() print '</div>'
def __init__(self, path, words, dim=300, normalize=True, **kwargs): seen = [] vs = {} for line in lines(path): split = line.split() w = split[0] if w in words: seen.append(w) vs[w] = np.array(list(map(float, split[1:])), dtype='float32') self.iw = seen self.wi = {w: i for i, w in enumerate(self.iw)} self.m = np.vstack(vs[w] for w in self.iw) if normalize: self.normalize()
def add_song_to_playlist(song_id: str, playlist_name): filename = f'{playlist_name}.playlist' playlist_dir = file_management.get_playlists_path() playlist_dest = os.path.join(playlist_dir, filename) if not os.path.exists(playlist_dest): raise KeyError(f'playlist does not exist: {playlist_dest}') if song_id not in file_management.get_song_ids(): raise KeyError('song id does not exist') current_songs = util.lines(open(playlist_dest)) if song_id in current_songs: print('warning! song {song_id} already in current songs! type "y" to continue') if input('>') != 'y': return with open(playlist_dest, 'a') as playlist: playlist.write(song_id + '\n')
def parse(filename): return ContinuousWood(util.lines(filename))
def interfaceExists(host, iface): return util.lines(host.execute("[ -d /sys/class/net/%s ]; echo $?" % iface))[0] == "0"
def interfaceBridge(host, iface): return util.lines(host.execute("[ -d /sys/class/net/%s/brport/bridge ] && basename $(readlink /sys/class/net/%s/brport/bridge)" % (iface, iface)))[0]
def bridgeExists(host, bridge): return util.lines(host.execute("[ -d /sys/class/net/%s/brif ]; echo $?" % bridge))[0] == "0"
def get_song_ids(): dest = get_ids_path() with open(dest) as f: return util.lines(f)
Usage: 'measure dict-file' Pearls of Computer Science, Week 2 """ # standard module to access command-line parameter list sys.argv import sys # standard module containing process_time function import time from ordsearch import linear from ordsearch import binary # read words from dictionary file import util words = util.lines(sys.argv[0]) # ask for the first word value = input("Search for first word? ") # continue as long as a word was typed while value != "": # measure time for linear searching lstart = time.process_time() lresult = linear(words, value) lend = time.process_time() # time values are fractions of seconds; # multiply by a million and round to get microseconds ltime = round((lend - lstart) * 1000000) # measure time for binary searching bstart = time.process_time()
parR = np.empty(numBins, np.dtype('float64')) parRerr = np.empty(numBins, np.dtype('float64')) parL = np.empty(numBins, np.dtype('float64')) parLerr = np.empty(numBins, np.dtype('float64')) for ibin in range(0, numBins): hists.append( r.TH1D("tmp_" + str(ibin), "tmp_" + str(ibin), 400, 0., 2.)) energies.append(int(ibin * energyBin)) energies.append(maxEnergy) if numBins > len(ut.colors()): for i in range(0, numBins - len(ut.colors())): ut.colors().append(ut.colors()[i]) ut.alpha().append(0.3) ut.lines().append(ut.lines()[i]) ut.width().append(ut.width()[i]) ut.fill().append(3001) leg = r.TLegend(0.25, 0.55, 0.3, 0.85) leg.SetTextFont(132) leg.SetTextSize(0.05) leg.SetFillColor(0) leg.SetFillStyle(0) ifile = "/eos/user/c/cneubuse/miniCalo2/pred/stage" + str(st) + "/out.root" miX = 0.8 maX = 1.18 # if args.stage>1: # miX=0.8
def parse_file(self, file): sents = split_iter(lines(file), lambda line: line == "") sents = islice(filter(bool, sents), self.conf.max_ninst) sents = map("\n".join, sents) return [conllu.parse(sent)[0] for sent in sents]
continue print proc_num, "Running", name subredditgen.main(name) word_dict = util.load_pickle(DICTS.format(name)) word_dict.filter_extremes(no_above=0.1, no_below=100) to_keep = sorted(word_dict.dfs, key=lambda w : word_dict.dfs[w], reverse=True)[:5000] word_dict.filter_tokens(good_ids=to_keep) sub_vecs = create_representation("SVD", constants.SUBREDDIT_EMBEDDINGS.format(name)) pos_seeds, neg_seeds = seeds.twitter_seeds() sub_vecs = sub_vecs.get_subembed(set(word_dict.token2id.keys()).union(pos_seeds).union(neg_seeds)) pols = polarity_induction_methods.bootstrap(sub_vecs, pos_seeds, neg_seeds, return_all=True, nn=25, beta=0.9, num_boots=50, n_procs=10) util.write_pickle(pols, POLARITIES + name + ".pkl") if __name__ == "__main__": queue = Queue() id = int(sys.argv[1]) valid_ids = set(range(250, 256)) for i, line in enumerate(util.lines(NAMES)): if i in valid_ids: name = line.split()[0] queue.put(name) print queue.qsize() procs = [Process(target=worker, args=[i, queue]) for i in range(1)] for p in procs: p.start() for p in procs: p.join()
group.add_argument('-l', '--list', action="store_true") group.add_argument('-c', '--create', type=str, metavar='<PLAYLIST>') group.add_argument('-d', '--delete', type=str, metavar='<PLAYLIST>') group.add_argument('-a', '--add-song', type=str, metavar='<PLAYLIST>') group.add_argument('-r', '--remove-song', type=str, metavar='<PLAYLIST>') parser.add_argument('-s', '--song', type=str, metavar='<SONG NAME>') args = parser.parse_args() # print(args) if args.list: playlist_dir = file_management.get_playlists_path() for filename in os.listdir(playlist_dir): abspath = os.path.join(playlist_dir, filename) print(os.path.splitext(filename)[0]) for song_id in util.lines(open(abspath)): print(f'[{song_id}] {songs.get_song_info(song_id)["title"]}') elif args.create is not None: create_playlist(args.create) elif args.delete is not None: delete_playlist(args.delete) elif args.add_song is not None: if args.song is None: sys.exit('provide a song (-s)') add_song_to_playlist(args.song, args.add_song) elif args.remove_song is not None: if args.song is None:
__author__ = 'Egbert' from ordsearch import binary from ordsearch import linear from util import lines import searchmeasure print(binary(lines("Unabr.dict"),"eagle")) print(binary(lines("Unabr.dict"),"zygose")) searchmeasure.search("Unabr.dict","eagle")
songs_dir = file_management.get_songs_path() for filepath in glob.glob(os.path.join(songs_dir, f'{song_id}.*')): abspath = os.path.join(songs_dir, filepath) print(f'deleting {abspath}') os.remove(abspath) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Song configuration') group = parser.add_mutually_exclusive_group() group.add_argument('-l', '--list', action="store_true") group.add_argument('-a', '--add-url', type=str, metavar='<URL>') group.add_argument('-d', '--delete', type=str, metavar='<ID>') args = parser.parse_args() # print(args) if args.list: ids_path = file_management.get_ids_path() all_ids = util.lines(open(ids_path)) if len(all_ids) == 0: print('no songs!') for song_id in all_ids: json_data = get_song_info(song_id) print(f'[{song_id}] {json_data["title"]}') elif args.add_url is not None: link = args.add_url add_song(link) elif args.delete is not None: song_id = args.delete remove_song(song_id)
def interfaceBridge(host, iface): try: return util.lines(host.execute("[ -d /sys/class/net/%s/brport/bridge ] && basename $(readlink /sys/class/net/%s/brport/bridge)" % (util.identifier(iface), util.identifier(iface))))[0] except exceptions.CommandError: return False
class Transformer(): nspecial_symbols_segment1 = 2 # [CLS] sent1... [SEP] nspecial_symbols_segment2 = 1 # sent2... [SEP] add_tokens_key = 'additional_special_tokens' supported_langs = set( lines(Path(__file__).parent / "data" / "bert_langs.wiki")) def __init__(self, model_name, device=None, max_len=None, auto_model_cls=AutoModel, only_tokenizer=False, custom_n_hidden=None, custom_n_layers=None): super().__init__() self.randinit = model_name.endswith('-randinit') if self.randinit: model_name = model_name[:-len('-randinit')] self.model_name = model_name self.device = device or _device do_lower_case = "uncased" in model_name self.tokenizer = AutoTokenizer.from_pretrained( self.model_name, do_lower_case=do_lower_case) for name in 'mask cls sep bos eos'.split(): token = getattr(self.tokenizer, name + '_token') setattr(self, name.upper(), token) # self.begin_mention_idx = self.tokenizer.convert_tokens_to_ids( # self.BEGIN_MENTION) if self.model_name.startswith('roberta'): self.BEGIN_MENTION = 'madeupword0000' self.END_MENTION = 'madeupword0001' self.add_special_symbols = self.add_special_symbols_roberta else: self.BEGIN_MENTION = '[unused0]' self.END_MENTION = '[unused1]' self.add_special_symbols = self.add_special_symbols_bert self.BEGIN_MENTION_IDX = self.tokenizer.convert_tokens_to_ids( self.BEGIN_MENTION) self.begin_mention_idx = self.BEGIN_MENTION_IDX self.END_MENTION_IDX = self.tokenizer.convert_tokens_to_ids( self.END_MENTION) additional_special_tokens = [self.BEGIN_MENTION, self.END_MENTION] self.tokenizer.add_special_tokens( {self.add_tokens_key: additional_special_tokens}) self.max_len = max_len or self.tokenizer.max_len self.pad_idx = self.tokenizer.pad_token_id self.mask_idx = self.tokenizer.mask_token_id self.vocab_size = len(self.tokenizer) if not only_tokenizer: if self.randinit: model_config = AutoConfig.from_pretrained(self.model_name) print('creating model with random init', self.model_name) if custom_n_hidden: ratio = model_config.intermediate_size // model_config.hidden_size model_config.hidden_size = custom_n_hidden model_config.intermediate_size = ratio * custom_n_hidden if custom_n_layers: model_config.num_hidden_layers = custom_n_layers self.model = auto_model_cls.from_config(model_config) print('custom model_config:', model_config) else: print('loading model', self.model_name) self.model = auto_model_cls.from_pretrained(model_name) word_emb = self.model.get_input_embeddings().weight self.dim = word_emb.size(1) device_count = torch.cuda.device_count() self.model.to(device=self.device) def update_special_tokens(self, additional_special_tokens): current = self.tokenizer.special_tokens_map[self.add_tokens_key] self.tokenizer.add_special_tokens( {self.add_tokens_key: current + additional_special_tokens}) def __call__(self, *args, **kwargs): return self.model(*args, **kwargs) def tokenize(self, text, masked_idxs=None): if isinstance(text, str): tokenized_text = self.tokenizer.tokenize(text) if masked_idxs is not None: for idx in masked_idxs: tokenized_text[idx] = self.MASK tokenized = self.add_special_symbols(tokenized_text) return tokenized return list(map(self.tokenize, text)) def add_special_symbols_bert(self, tokenized_text): return [self.CLS] + tokenized_text + [self.SEP] def add_special_symbols_roberta(self, tokenized_text): return [self.BOS] + tokenized_text + [self.EOS] def tokenize_sentence_pair(self, sent1, sent2): tokenized_sent1 = self.tokenizer.tokenize(sent1) tokenized_sent2 = self.tokenizer.tokenize(sent2) return self.add_special_symbols_sent_pair(tokenized_sent1, tokenized_sent2) def add_special_symbols_sent_pair(self, tokenized_sent1, tokenized_sent2): return ([self.CLS] + tokenized_sent1 + [self.SEP] + tokenized_sent2 + [self.SEP]) def tokenize_to_ids(self, text, masked_idxs=None, pad=True, max_len=None, clip_long_seq=False): tokens = self.tokenize(text, masked_idxs) return self.convert_tokens_to_ids(tokens, pad=pad, max_len=max_len, clip_long_seq=clip_long_seq) def tokenize_sentence_pair_to_ids(self, sent1, sent2): tokenized_sent1 = self.tokenizer.tokenize(sent1) segment1_len = len(tokenized_sent1) + self.nspecial_symbols_segment1 tokenized_sent2 = self.tokenizer.tokenize(sent2) segment2_len = len(tokenized_sent2) + self.nspecial_symbols_segment2 tokenized_sents = self.add_special_symbols(tokenized_sent1, tokenized_sent2) padded_ids, padding_mask = self.convert_tokens_to_ids(tokenized_sents) segment_ids = self.segment_ids(segment1_len, segment2_len) return padded_ids, padding_mask, segment_ids def mask_mention_and_tokenize_context(self, collapse_mask, *, left_ctx, mention, right_ctx, **kwargs): left_ctx_tokenized = self.tokenize(left_ctx)[:-1] # remove [SEP] if collapse_mask: masked_mention = [self.MASK] else: mention_tokenized = self.tokenize(mention) masked_mention = [self.MASK] * len(mention_tokenized) right_ctx_tokenized = self.tokenize(right_ctx)[1:] # remove [CLS] tokens = left_ctx_tokenized + masked_mention + right_ctx_tokenized return tokens def mask_mention_and_tokenize_context_to_ids(self, left_ctx, mention, right_ctx, collapse_mask=True, pad=True): tokens = self.mask_mention_and_tokenize_context( collapse_mask=collapse_mask, left_ctx=left_ctx, mention=mention, right_ctx=right_ctx) return tokens, self.convert_tokens_to_ids(tokens, pad=pad) def mask_mentions_and_tokenize_contexts_to_ids(self, mentions_and_contexts, collapse_mask=True): tokens = [ self.mask_mention_and_tokenize_context(collapse_mask=collapse_mask, **ment_ctx) for ment_ctx in mentions_and_contexts ] return tokens, self.convert_tokens_to_ids(tokens) def convert_tokens_to_ids(self, tokens, pad=True, max_len=None, clip_long_seq=False): max_len = max_len or self.max_len if not tokens: dummy = torch.tensor([]).to(device=self.device) if pad: return dummy.to(dtype=torch.long), dummy.to(dtype=torch.uint8) return dummy elif isinstance(tokens[0], list): token_idss = map(self.tokenizer.convert_tokens_to_ids, tokens) padded_ids = torch.zeros( (len(tokens, ), max_len), dtype=torch.long) + self.pad_idx for row_idx, token_ids in enumerate(token_idss): token_ids = torch.tensor(token_ids) if clip_long_seq: token_ids = token_ids[:max_len] padded_ids[row_idx, :len(token_ids)] = token_ids padded_ids = padded_ids.to(device=self.device) mask = padded_ids != self.pad_idx return padded_ids, mask token_ids = self.tokenizer.convert_tokens_to_ids(tokens) ids = torch.tensor([token_ids]).to(device=self.device) if clip_long_seq: ids = ids[:, :max_len] else: assert ids.size( 1 ) <= max_len, f'{ids.size(1)} > {max_len}\n{len(tokens)} {tokens}' if pad: padded_ids = torch.zeros(1, max_len).to(ids) + self.pad_idx padded_ids[0, :ids.size(1)] = ids mask = torch.zeros(1, max_len).to(ids) mask[0, :ids.size(1)] = 1 return padded_ids, mask else: return ids def subword_tokenize(self, tokens, mask_start_idx=None, mask_end_idx=None, add_mask_start_end_markers=False, collapse_mask=True, apply_mask=True, add_special_symbols=True): """Segment each token into subwords while keeping track of token boundaries. Parameters ---------- tokens: A sequence of strings, representing input tokens. Returns ------- A tuple consisting of: - A list of subwords, flanked by the required special symbols. - An array of indices into the list of subwords, indicating that the corresponding subword is the start of a new token. For example, [1, 3, 4, 7] means that the subwords 1, 3, 4, 7 are token starts, while all other subwords (0, 2, 5, 6, 8...) are in or at the end of tokens. This list allows selecting Bert hidden states that represent tokens, which is necessary in sequence labeling. """ if mask_start_idx is not None: try: mask_starts = list(iter(mask_start_idx)) except TypeError: mask_starts = [mask_start_idx] if mask_end_idx is None: assert len(mask_starts) == 1 mask_ends = [mask_starts[0] + 1] else: try: mask_ends = list(iter(mask_end_idx)) except TypeError: mask_ends = [mask_end_idx] mask_start_ends = list(reversed(list(zip(mask_starts, mask_ends)))) if apply_mask: for mask_start, mask_end in mask_start_ends: if collapse_mask: mask_len = 1 else: mention = ' '.join(tokens[mask_start:mask_end]) mention_subw = self.tokenize(mention)[1:-1] mask_len = len(mention_subw) tokens = (tokens[:mask_start] + [self.MASK] * mask_len + tokens[mask_end:]) if add_mask_start_end_markers: for mask_start, mask_end in mask_start_ends: if apply_mask: if collapse_mask: mask_len = 1 else: mention = ' '.join(tokens[mask_start:mask_end]) mention_subw = self.tokenize(mention)[1:-1] mask_len = len(mention_subw) mention = [self.MASK] * mask_len else: mention = tokens[mask_start:mask_end] tokens = (tokens[:mask_start] + [self.BEGIN_MENTION] + mention + [self.END_MENTION] + tokens[mask_end:]) # account for inserted mention markers new_mask_starts = [ i for i, t in enumerate(tokens) if t == self.BEGIN_MENTION ] new_mask_ends = [ i + 1 for i, t in enumerate(tokens) if t == self.END_MENTION ] mask_start_ends = list( reversed(list(zip(new_mask_starts, new_mask_ends)))) subwords = list(map(self.tokenizer.tokenize, tokens)) subword_lengths = list(map(len, subwords)) subwords = list(flatten(subwords)) if add_special_symbols: subwords = self.add_special_symbols(subwords) offset = 1 # + 1: assumes one special symbol is prepended to the input sequence else: offset = 0 token_start_idxs = offset + np.cumsum([0] + subword_lengths[:-1]) if mask_start_idx is not None: return subwords, token_start_idxs, mask_start_ends return subwords, token_start_idxs, None def subword_tokenize_to_ids(self, tokens, mask_start_idx=None, mask_end_idx=None, add_mask_start_end_markers=False, collapse_mask=True, apply_mask=True, return_mask_mask=False, return_mask_start_end=False, max_len=None, add_special_symbols=True): """Segment each token into subwords while keeping track of token boundaries and convert subwords into IDs. Parameters ---------- tokens: A sequence of strings, representing input tokens. Returns ------- A tuple consisting of: - A list of subword IDs, including IDs of the required special symbols. - A mask indicating padding tokens. - An array of indices into the list of subwords. See doc of subword_tokenize. """ max_len = max_len or self.max_len subwords, token_start_idxs, mask_start_ends = self.subword_tokenize( tokens, mask_start_idx=mask_start_idx, mask_end_idx=mask_end_idx, add_mask_start_end_markers=add_mask_start_end_markers, collapse_mask=collapse_mask, apply_mask=apply_mask, add_special_symbols=add_special_symbols) subword_ids, padding_mask = self.convert_tokens_to_ids(subwords, max_len=max_len) token_starts = torch.zeros(1, max_len).to(subword_ids) token_starts[0, token_start_idxs] = 1 if return_mask_mask: mask_mask = torch.zeros(1, max_len).to(subword_ids) for mask_start, mask_end in mask_start_ends: token_mask_idxs = list(range(mask_start, mask_end)) subw_mask_idxs = token_start_idxs[token_mask_idxs] mask_mask[0, subw_mask_idxs] = 1 if return_mask_start_end: mask_start_end = torch.zeros(1, max_len).to(subword_ids) # this only works if there are fewer than seq_len // 2 masks for i, (mask_start, mask_end) in enumerate(mask_start_ends): token_mask_idxs = list(range(mask_start, mask_end)) subw_mask_idxs = token_start_idxs[token_mask_idxs] mask_start_end[0, 2 * i] = int(subw_mask_idxs[0]) mask_start_end[0, 2 * i + 1] = int(subw_mask_idxs[-1]) return (subword_ids, padding_mask, token_starts, mask_mask, mask_start_end) else: return subword_ids, padding_mask, token_starts, mask_mask return subword_ids, padding_mask, token_starts def segment_ids(self, segment1_len, segment2_len, pad=True, max_len=None): max_len = max_len or self.max_len npad = max_len - segment1_len - segment2_len ids = [0] * segment1_len + [1] * segment2_len + [0] * npad assert len(ids) == max_len return torch.tensor([ids]).to(device=self.device)
def _intdef_ids(): intdef_ids = Bunch([(x, i) for i, x in enumerate( lines(INTDEFS_CSV)) if x.strip()]) return intdef_ids
def _register_id_map(): return Bunch([(x, i) for i, x in enumerate(lines(REGISTERS_CSV))])
import argparse import os import util import subprocess import file_management plp = file_management.get_playlists_path() sgp = file_management.get_songs_path() parser = argparse.ArgumentParser() parser.add_argument('-p', '--playlist', type=str, metavar='<TITLE>') args = parser.parse_args() playlist = args.playlist aspath = os.path.join(plp, f'{playlist}.playlist') with open(aspath) as f: for line in util.lines(f): song_dest = os.path.join(sgp, f'{line}.m4a') # subprocess.run(['ffplay', '-nodisp', '-nostats', '-hide_banner', song_dest]) subprocess.run(['afplay', song_dest])
sub_vecs = create_representation( "SVD", constants.SUBREDDIT_EMBEDDINGS.format(name)) pos_seeds, neg_seeds = seeds.twitter_seeds() sub_vecs = sub_vecs.get_subembed( set(word_dict.token2id.keys()).union(pos_seeds).union(neg_seeds)) pols = polarity_induction_methods.bootstrap(sub_vecs, pos_seeds, neg_seeds, return_all=True, nn=25, beta=0.9, num_boots=50, n_procs=10) util.write_pickle(pols, POLARITIES + name + ".pkl") if __name__ == "__main__": queue = Queue() id = int(sys.argv[1]) valid_ids = set(range(250, 256)) for i, line in enumerate(util.lines(NAMES)): if i in valid_ids: name = line.split()[0] queue.put(name) print queue.qsize() procs = [Process(target=worker, args=[i, queue]) for i in range(1)] for p in procs: p.start() for p in procs: p.join()