def test_sum_from(): """ Tests the sum_from function. """ print() print('--------------------------------------------------') print('Testing the sum_from function:') print('--------------------------------------------------') # These first two tests use an ORACLE for testing. # The oracle here is the built-in sum function. actual_answer = sum_from(6, 9) oracle_answer = builtins.sum(range(6, 10)) test_case = 'sum_from(6, 9). Actual, Oracle answers:' print(' Called ', test_case, actual_answer, oracle_answer) actual_answer = sum_from(100, 10000) oracle_answer = builtins.sum(range(100, 10001)) test_case = 'sum_from(100, 10000). Actual, Oracle answers:' print(' Called ', test_case, actual_answer, oracle_answer) # This test uses a KNOWN answer # (Everyone "knows" that the sum from 0 to 0 is 0.) actual_answer = sum_from(0, 0) known_answer = 0 test_case = 'sum_from(0, 0). Actual, Known answers:' print(' Called ', test_case, actual_answer, known_answer) # This test uses a FORMULA answer # (which is a kind of ORACLE answer) that uses the formula: # m + (m+1) + (m+2) + ... + n = (m + n) * (n - m + 1) / 2 actual_answer = sum_from(53, 4999) formula_answer = (53 + 4999) * (4999 - 53 + 1) // 2 test_case = 'sum_from(53, 4999). Actual, Formula answers:' print(' Called ', test_case, actual_answer, formula_answer)
def test_sum_from(): """ Tests the sum_from function. """ print() print('--------------------------------------------------') print('Testing the sum_from function:') print('--------------------------------------------------') # ------------------------------------------------------------------ # These first two tests use an ORACLE for testing, # that is, a way to get the answer by using some other approach # that is known to work correctly. # The oracle here is the builtins.sum function. # ------------------------------------------------------------------ # Test 1: answer_from_oracle = builtins.sum(range(6, 10)) answer_from_my_code = sum_from(6, 9) print('Test 1 expected (from oracle):', answer_from_oracle) print(' actual (from my code): ', answer_from_my_code) # Test 2: answer_from_oracle = builtins.sum(range(100, 10001)) answer_from_my_code = sum_from(100, 10000) print('Test 2 expected (from oracle):', answer_from_oracle) print(' actual (from my code): ', answer_from_my_code) # ------------------------------------------------------------------ # The next test uses a KNOWN answer (usually computed by hand). # (Everyone "knows" that the sum from 0 to 3 is 0+1+2+3, i.e. 6.) # ------------------------------------------------------------------ # Test 3: answer_from_by_hand = 6 answer_from_my_code = sum_from(0, 3) print('Test 3 expected (from by-hand):', answer_from_by_hand) print(' actual (from my code): ', answer_from_my_code) # ------------------------------------------------------------------ # The next test uses a FORMULA answer (which is one kind of ORACLE answer) # that uses the formula: # m + (m+1) + (m+2) + ... + n = (m + n) * (n - m + 1) / 2 # ------------------------------------------------------------------ # Test 4: answer_from_formula = (53 + 4999) * (4999 - 53 + 1) // 2 answer_from_my_code = sum_from(53, 4999) print('Test 4 expected (from formula):', answer_from_formula) print(' actual (from my code): ', answer_from_my_code)
def leaderboard_sequence(spectrum, n, alphabet): spectrum = sorted(spectrum) parent_mass = max(spectrum) leader_board = [[]] leader_peptide = [] while len(leader_board) > 0: leader_board = expand(leader_board, alphabet) # copy for loop # leader_score = score(leader_peptide, spectrum) leader_score = 0 temp = leader_board[:] for peptide in temp: mass = sum(peptide) if mass == parent_mass: s = cyc_score(peptide, spectrum) if s > leader_score: leader_peptide = peptide leader_score = s elif mass > parent_mass: leader_board.remove(peptide) leader_board = trim(leader_board, spectrum, n) return leader_peptide
def logp_partial_gradient(self, variable, calculation_set=None): """ Calculates the partial gradient of the posterior of self with respect to variable. Returns zero if self is not in calculation_set. """ if (calculation_set is None) or (self in calculation_set): if not datatypes.is_continuous(variable): return zeros(shape(variable.value)) if variable is self: try: gradient_func = self._logp_partial_gradients['value'] except KeyError: raise NotImplementedError( repr( self) + " has no gradient function for 'value'") gradient = np.reshape( gradient_func.get( ), np.shape( variable.value)) else: gradient = builtins.sum( [self._pgradient(variable, parameter, value) for parameter, value in six.iteritems(self.parents)]) return gradient else: return 0
def logp_partial_gradient(self, variable, calculation_set=None): """ gets the logp gradient of this deterministic with respect to variable """ if self.verbose > 0: print_('\t' + self.__name__ + ': logp_partial_gradient accessed.') if not (datatypes.is_continuous(variable) and datatypes.is_continuous(self)): return zeros(shape(variable.value)) # loop through all the parameters and add up all the gradients of log p # with respect to the approrpiate variable gradient = builtins.sum( [child.logp_partial_gradient(self, calculation_set) for child in self.children]) totalGradient = 0 for parameter, value in six.iteritems(self.parents): if value is variable: totalGradient += self.apply_jacobian( parameter, variable, gradient) return np.reshape(totalGradient, shape(variable.value))
def logp_gradient_contribution(self, calculation_set = None): """ Calculates the gradient of the joint log posterior with respect to self. Calculation of the log posterior is restricted to the variables in calculation_set. """ #NEED some sort of check to see if the log p calculation has recently failed, in which case not to continue return self.logp_partial_gradient(self, calculation_set) + builtins.sum([child.logp_partial_gradient(self, calculation_set) for child in self.children] )
def biased_random_selector(pdf): total = sum(pdf) r = random.uniform(0, total) running_total = 0.0 for i in range(0, len(pdf)): running_total += pdf[i] if running_total > r: return i raise Exception("Something's wrong, didn't manage to find a value in random selector")
def sum(*args): """Override the builtin sum function to handle multiple arguments. Arguments: *args -- lists of numbers or individual numbers """ fullList = [] for arg in args: if hasattr(arg, 'extend'): fullList.extend(arg) else: fullList.append(arg) return builtins.sum(fullList)
def mean(*args): """Added function to calculate the arithmetic average. Arguments: *args -- lists of numbers or individual numbers """ fullList = [] for arg in args: if hasattr(arg, 'extend'): fullList.extend(arg) else: fullList.append(arg) if not fullList: return 0 return builtins.sum(fullList) / len(fullList)
mel = expr.select(**ut.sel_startswith('melXmel_')) sim = expr.select(**ut.sel_startswith('simXsim_')) hyb = expr.select(**ut.sel_startswith(('melXsim', 'simXmel'))) expr_in_mel = (mel.max(axis=1) > EXPR_MIN) expr_in_sim = sim.max(axis=1) > EXPR_MIN expr_in_hybrids = (hyb.max(axis=1) > EXPR_MIN) expr_in_all = (expr_in_mel & expr_in_sim & expr_in_hybrids) expr = expr.ix[expr_in_all] embryo_types = {c.split('_sl')[0].split('_rep')[0] for c in expr.columns} embryos = {} for etype in embryo_types: embryos[etype] = { c.split('_sl')[0] for c in expr.columns if c.startswith(etype) } combs = sum([sorted(it.combinations(e, 2)) for e in embryos.values()], []) combs += list( it.product(embryos['melXsim_cyc14C'], embryos['simXmel_cyc14C'])) emds = pd.DataFrame(index=expr.index, columns=["{}-{}".format(*c) for c in combs], data=-1) for gene in pb()(expr.index): for e1, e2 in combs: emds.ix[gene, "{}-{}".format(e1, e2)] = (dd.earth_mover_multi_rep( expr.ix[gene].select(ut.startswith(e1)) + EXPR_MIN, expr.ix[gene].select(ut.startswith(e2)) + EXPR_MIN, ))
def get_rating_average_and_count(pk): ratings = Review.objects.filter(mechanic=pk).values_list('rating', flat=True) count = len(ratings) return [int(sum(ratings) / count / 5 * 100), count]
def EfficientNet(width_coefficient, depth_coefficient, default_size, dropout_rate=0.2, drop_connect_rate=0.2, depth_divisor=8, model_name='efficientnet', include_top=True, num_classes=1000, **kwargs): """Instantiates the EfficientNet architecture using given scaling coefficients. Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. Args width_coefficient: float, scaling coefficient for network width. depth_coefficient: float, scaling coefficient for network depth. default_size: integer, default input image size. dropout_rate: float, dropout rate before final classifier layer. drop_connect_rate: float, dropout rate at skip connections. depth_divisor: integer, a unit of network width. activation_fn: activation function. model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. input_shape: optional shape tuple, only to be specified if `include_top` is False. It should have exactly 3 inputs channels. num_classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Returns A Efficientnet model instance. """ default_block_args = deepcopy(DEFAULT_BLOCKS_ARGS) def round_filters(filters, divisor=depth_divisor): """Round number of filters based on depth multiplier.""" filters *= width_coefficient new_filters = builtins.max( divisor, int(filters + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_filters < 0.9 * filters: new_filters += divisor return int(new_filters) def round_repeats(repeats): """Round number of repeats based on depth multiplier.""" return int(math.ceil(depth_coefficient * repeats)) flow_list = [] efficientnet = Sequential(name=model_name) efficientnet.add_module( 'stem', Conv2d_Block((3, 3), round_filters(32), strides=2, use_bias=False, auto_pad=True, padding_mode='zero', normalization='batch', activation='swish', name='stem')) b = 0 blocks = float(builtins.sum(args['repeats'] for args in default_block_args)) for (i, args) in enumerate(default_block_args): assert args['repeats'] > 0 # Update block input and output filters based on depth multiplier. # args['filters_in'] = round_filters(args['filters_in']) # args['filters_out'] = round_filters(args['filters_out']) for j in range(round_repeats(args.pop('repeats'))): # The first block needs to take care of stride and filter size increase. if j > 0: args['strides'] = 1 args['filters_in'] = args['filters_out'] efficientnet.add_module( 'block{}{}'.format(i + 1, chr(j + 97)), efficient_block(expand_ratio=args['expand_ratio'], filters_in=round_filters(args['filters_in']), filters_out=round_filters(args['filters_out']), kernel_size=args['kernel_size'], strides=args['strides'], zero_pad=0, se_ratio=args['se_ratio'], drop_connect_rate=drop_connect_rate * b / blocks, name='block{}{}_'.format(i + 1, chr(j + 97)))), b += 1 efficientnet.add_module( 'top_conv', Conv2d_Block((1, 1), round_filters(1280), strides=1, use_bias=False, auto_pad=True, padding_mode='zero', normalization='batch', activation='swish', name='top_conv')) efficientnet.add_module('avg_pool', GlobalAvgPool2d(name='avg_pool')) if include_top: if dropout_rate > 0: efficientnet.add_module('top_dropout', Dropout(dropout_rate, name='top_dropout')) efficientnet.add_module('fc', Dense(num_classes, activation=None, name='fc')) efficientnet.add_module('softmax', SoftMax(axis=-1, name='softmax')) if isinstance(default_size, int): default_size = (default_size, default_size, 3) elif len(default_size) == 1: default_size = (default_size[0], default_size[0], 3) model = ImageClassificationModel(input_shape=default_size, output=efficientnet) with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'imagenet_labels1.txt'), 'r', encoding='utf-8-sig') as f: labels = [l.rstrip() for l in f] model.class_names = labels model.preprocess_flow = [ Resize((default_size[0], default_size[1]), keep_aspect=True), Normalize(0, 255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ] return model
def checkAndCountPrevWorldNumberOfCells(fName): if not os.path.isfile(fName): return 0 with open(fName) as f: return builtins.sum(1 for _ in f)
return [a + b for a in A for b in B] digits = '123456789' rows = 'ABCDEFGHI' cols = digits squares = cross(rows, cols) unit_list = ([cross(rows, c) for c in cols] + [cross(r, cols) for r in rows] + [ cross(rs, cs) for rs in ('ABC', 'DEF', 'GHI') for cs in ('123', '456', '789') ]) units = dict((s, [u for u in unit_list if s in u]) for s in squares) peers = dict((s, set(sum(units[s], [])) - set([s])) for s in squares) def test(): """A set of unit tests""" assert len(squares) == 81 assert len(unit_list) == 27 assert all(len(units[s]) == 3 for s in squares) assert all(len(peers[s]) == 20 for s in peers) assert units['C2'] == [[ 'A2', 'B2', 'C2', 'D2', 'E2', 'F2', 'G2', 'H2', 'I2' ], ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9'], ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'C1', 'C2', 'C3']] assert peers['C2'] == set([
def _concatenate_shape(tensor, combine_block): return tuple(builtins.sum(nsplit[i] for i in cb) for nsplit, cb in zip(tensor.nsplits, combine_block))
def LHSSimilarpairs(l1, l2, n): L3 = [1 if x == y else 0 for s1, s2 in zip(l1, l2) for x, y in zip(s1, s2)] L4 = [builtins.sum(L3[i:i + n]) for i in range(0, len(L3), n)] LSH = len([x for x in L4 if x >= n]) return LSH
def sum(*args): if len(args) == 0: return bpipe(sum) else: return builtins.sum(*args)
def multi_slides(pattern=example_pattern, right_down_pairs=[(1, 1), (3, 1), (5, 1), (7, 1), (1, 2)], output_func=lambda x: x == '#'): for right, down in right_down_pairs: yield sum(slide(pattern, right, down, output_func))
from itertools import cycle from builtins import sum def slide(pattern=example_pattern, right=3, down=1, output_func=lambda x: x == '#'): pattern_rows = pattern.split('\n') nrows = len(pattern_rows) ncols = len(pattern_rows[0]) row, col = down, right while row < nrows: yield output_func(pattern_rows[row][col % ncols]) row += down col += right assert sum(slide(example_pattern)) == 7 def multi_slides(pattern=example_pattern, right_down_pairs=[(1, 1), (3, 1), (5, 1), (7, 1), (1, 2)], output_func=lambda x: x == '#'): for right, down in right_down_pairs: yield sum(slide(pattern, right, down, output_func)) import math assert math.prod(multi_slides()) == 336
def sum(x:(N**T1)[pylist]) -> pyfloat: return builtins.sum(x._v)
def sum(x): return builtins.sum(x)
def rankingKey(x): return builtins.sum(abs(a - b) for a, b in zip(x, origOffsets))
def run_test_sum_first_n(): """ Tests the sum_first_n function. """ # ------------------------------------------------------------------ # DONE: 8. Implement this TEST function. # It TESTS the sum_first_n function defined below. # Include at least ** 2 ** ADDITIONAL tests. # # As usual, include both EXPECTED and ACTUAL results in your test # and compute the latter BY HAND (not by running your program). # ------------------------------------------------------------------ print() print('--------------------------------------------------') print('Testing the sum_first_n function:') print('--------------------------------------------------') # Test 1: expected = 0 actual = sum_first_n([48, -10, 50, 5], 0) print() print('Test 1 expected:', expected) print(' actual: ', actual) # Test 2: expected = 48 actual = sum_first_n([48, -10, 50, 5], 1) print() print('Test 2 expected:', expected) print(' actual: ', actual) # Test 3: expected = 38 actual = sum_first_n([48, -10, 50, 5], 2) print() print('Test 3 expected:', expected) print(' actual: ', actual) # Test 4: expected = 88 actual = sum_first_n([48, -10, 50, 5], 3) print() print('Test 4 expected:', expected) print(' actual: ', actual) # Test 5: expected = 93 actual = sum_first_n([48, -10, 50, 5], 4) print() print('Test 5 expected:', expected) print(' actual: ', actual) # Test 6: This test uses a RANDOMLY generated sequence # and an ORACLE to determine the expected (correct) result. sequence = [] for _ in range(10000): sequence.append(random.randrange(-100, 100)) expected = builtins.sum(sequence[:-1]) actual = sum_first_n(sequence, 9999) print() print('Test 6 expected:', expected) print(' actual: ', actual) # Test 7: This test uses a RANDOMLY generated sequence # and an ORACLE to determine the expected (correct) result. sequence = [] for _ in range(10000): sequence.append(random.randrange(-100, 100)) expected = builtins.sum(sequence[:-4000]) actual = sum_first_n(sequence, 6000) print() print('Test 7 expected:', expected) print(' actual: ', actual) # TO DO 8 (continued): Add your 2 ADDITIONAL tests here: # Test 8: expected = 90 actual = sum_first_n([30, 40, 50, -30], 4) print() print('Test 8 expected:', expected) print(' actual: ', actual) # Test 9: expected = -30 actual = sum_first_n([-10, -20, 30, 10], 2) print() print('Test 9 expected:', expected) print(' actual: ', actual)
def _get_offset(tensor, axis, chunk, ravel): nsplits = tensor.nsplits offset = tuple(builtins.sum(split[:idx]) for split, idx in zip(nsplits, chunk.index)) if not ravel: offset = offset[axis[0]] return offset
def run_test_sum_sequence(): """ Tests the sum_sequence function. """ print() print('--------------------------------------------------') print('Testing the sum_sequence function:') print('--------------------------------------------------') # ------------------------------------------------------------------------- # DONE: 2. READ the COMMENTS and CODE in this function, # asking questions as needed. # # When you believe that you understand: # -- What an ORACLE is # -- How one can generate and use RANDOM test cases # -- How one can test using PROBABILITY THEORY # then: # change the above TO DO to DONE. # ------------------------------------------------------------------------- # ------------------------------------------------------------------------- # Here (below) are examples of using an ORACLE for testing, # that is, using a separate way of gaining the correct tests as if # by "magic". The oracle here is the built-in sum function. # We provided two tests that use that oracle. # # BTW, google for "Oracle of Delphi" if you are curious about # why we call such tests "oracles". # ------------------------------------------------------------------------- # ------------------------------------------------------------------------- # Test 1 (using an ORACLE to computer the expected answer): # ------------------------------------------------------------------------- sequence1 = [48, -10, 100, 9939309808, 433443080, -45634930] oracle_answer = builtins.sum(sequence1) actual_answer = sum_sequence(sequence1) print() print('Test 1: Using the sequence:') print(' ', sequence1) print(' Expected (oracle) result: ', oracle_answer) print(' Actual result: ', actual_answer) # ------------------------------------------------------------------------- # Test 2 (using an ORACLE to computer the expected answer): # ------------------------------------------------------------------------- sequence2 = [48, 180, -475, 205, 88] oracle_answer = builtins.sum(sequence2) actual_answer = sum_sequence(sequence2) print() print('Test 2: Using the sequence:') print(' ', sequence2) print(' Expected (oracle) result: ', oracle_answer) print(' Actual result: ', actual_answer) # ------------------------------------------------------------------------- # Test 3 (using an ORACLE to compute the expected answer): # # This test uses a RANDOMLY generated sequence, # so every time you run the program it does a DIFFERENT test! # So this code snippet can be used to do MANY tests! # ------------------------------------------------------------------------- # The next few lines make a sequence of 10,000 RANDOM numbers: sequence3 = [] for _ in range(10000): sequence3.append(random.randrange(-10, 11)) oracle_answer = builtins.sum(sequence3) actual_answer = sum_sequence(sequence3) print() print('Test 3: Using the following RANDOMLY generated sequence:') print(' ', sequence3) print(' Expected (oracle) result: ', oracle_answer) print(' Actual result: ', actual_answer) # ------------------------------------------------------------------------- # Tests 4 and 5: using a KNOWN answer # (here, ones easily computed by hand).] # # Test 5 is an example of BOUNDARY (aka EDGE) testing, which is: # # Where test cases are generated using the EXTREMES of the # input domain, e.g. maximum, minimum, just inside/outside # boundaries, error values. It focuses] on "corner cases". # # The above quotation is a slight paraphrase from the Wikipedia # article at https://en.wikipedia.org/wiki/Boundary_testing. # # ------------------------------------------------------------------------- # Test 4: sequence4 = [48, -10] known_answer = 38 actual_answer = sum_sequence(sequence4) print() print('Test 4: Using the sequence:') print(' ', sequence4) print(' Expected (known) result: ', known_answer) print(' Actual result: ', actual_answer) # Test 5: sequence5 = [] known_answer = 0 actual_answer = sum_sequence(sequence5) print() print('Test 5: Using the sequence:') print(' ', sequence5) print(' Expected (known) result: ', known_answer) print(' Actual result: ', actual_answer) # ------------------------------------------------------------------------- # Test 6: (Don't worry if you don't follow this example fully.) # # Like Test 3, this test uses a RANDOMLY generated sequence. # # But unlike Test 3 (which used an ORACLE), # THIS example uses PROBABILITY THEORY to predict (approximately) # the expected value. # # It relies on what is called the # Law of Large Numbers # which, as applied here says: # If you compute the average of a lot of numbers with each # number drawn RANDOMLY from -10 to 10 (inclusive), # the result should be close to the average of the numbers # from -10 to 10 (inclusive) [which is 0]. # # See https://en.wikipedia.org/wiki/Law_of_large_numbers # for a not-too-clear explanation of the Law of Large Numbers. # ------------------------------------------------------------------------- # Skips this test if sum_sequence has not yet been implemented: if sum_sequence([1, 2, 3]) == None: return sequence6 = [] # Next lines make a sequence of 10000 RANDOM numbers for _ in range(10000): sequence6.append(random.randrange(-10, 11)) expected_sum_from_probability_theory = 0 expected_average_from_probability_theory = 0 actual_sum = sum_sequence(sequence6) actual_average = sum_sequence(sequence6) / 10000 print() print('Test 6: Using the following RANDOMLY generated sequence:') print(' ', sequence6) print(' Expected results (from PROBABILITY THEORY):') print(' Sum: ', expected_sum_from_probability_theory) print(' Average: ', expected_average_from_probability_theory) print(' ACTUAL results (should be CLOSE to the above)') print(' Sum: ', actual_sum) print(' Average: ', actual_average) print(' where "close" for the sum means absolute value < about 600')
def calc_sum(self): return builtins.sum(map(self.sample_func, self.aslist()))
def sum(xs): return builtins.sum(xs)
def _partial_reduction(cls, agg_op_type, tensor, axis, dtype, keepdims, combine_size, kw=None): from ..merge.concatenate import TensorConcatenate kw = kw or {} axes = sorted(combine_size.keys()) combine_blocks = [ cls._combine_split(i, combine_size, tensor.chunk_shape) for i in range(tensor.ndim) ] combine_blocks_idxes = [ range(len(blocks)) for blocks in combine_blocks ] chunks = [] for combine_block_idx, combine_block in zip( itertools.product(*combine_blocks_idxes), itertools.product(*combine_blocks)): chks = [ tensor.cix[idx] for idx in itertools.product(*combine_block) ] if len(chks) > 1: op = TensorConcatenate(axis=axes, dtype=chks[0].dtype) chk = op.new_chunk(chks, shape=cls._concatenate_shape( tensor, combine_block), order=tensor.order) else: chk = chks[0] shape = tuple(s if i not in combine_size else 1 for i, s in enumerate(chk.shape) if keepdims or i not in combine_size) agg_op = agg_op_type(axis=axis, dtype=dtype, keepdims=keepdims, **kw) chunk = agg_op.new_chunk( [chk], shape=shape, index=tuple(idx for i, idx in enumerate(combine_block_idx) if keepdims or i not in combine_size), order=tensor.order) chunks.append(chunk) nsplits = [ tuple(c.shape[i] for c in chunks if builtins.all(idx == 0 for j, idx in enumerate(c.index) if j != i)) for i in range(len(chunks[0].shape)) ] shape = tuple(builtins.sum(nsplit) for nsplit in nsplits) agg_op = agg_op_type(axis=axis, dtype=dtype, keepdims=keepdims, combine_size=combine_size, **kw) return agg_op.new_tensors([tensor], shape, order=tensor.order, chunks=chunks, nsplits=nsplits)
def cal_mdd(self, tok, head, pos): #flat_tok = [item for sublist in tok for item in sublist] #flat_head = [item for sublist in head for item in sublist] #flat_pos = [item for sublist in pos for item in sublist] #print(tok) mdd = 0 for t, h, p in zip(tok, head, pos): #print(t,h,p) # remove punct from head & sent list punct_indices = [i for i, x in enumerate(p) if x == "PUNCT"] head_wo_punct = [ i for j, i in enumerate(h) if j not in punct_indices ] sent_wo_punct = [ i for j, i in enumerate(t) if j not in punct_indices ] #print(punct_indices) #print(head_wo_punct) #print(sent_wo_punct) new_head = [] for i, j in enumerate( head_wo_punct ): # j is the word index + 1, old-id of the head. i is the word index #print('j: ', j) if j == 0: head_word = t[i] #print('head word: ', head_word) else: try: head_word = t[ j - 1] # find the original word, using old-id to index the original sent #print('head_word: ', head_word) new_word_id = sent_wo_punct.index( head_word ) + 1 #new_word_id is index + 1 as the old head new_head.append(new_word_id) except: #print('This word: ', head_word, 'is the head of another word, but punct should not be incuded in calculating MDD. This wrong relation will be discarded.') #print(tok) #print(head) #print(pos) pass #print('new_word_id: ', new_word_id) new_id = [i for i in range(1, len(new_head) + 1)] try: mdd = sum([abs(i - j) for i, j in zip(new_id, new_head) ]) / (len(sent_wo_punct) - 1) except ZeroDivisionError: #print('ZeroDivisionError! MDD will be 0.') mdd = 0 mdd += mdd final_mdd = mdd / len(tok) return final_mdd
def sum(iterable, *args): '''Replacement for the built-in :func:`sum() <python:sum>` function.''' return builtins.sum(iterable, *args)
def go(arg): if arg.seed < 0: seed = random.randint(0, 1000000) print('random seed: ', seed) else: torch.manual_seed(arg.seed) tbw = SummaryWriter(log_dir=arg.tb_dir) # Tensorboard logging # load data if arg.task == 'coco': with open(arg.data + os.sep + 'i2cat_train2017.json') as file: i2cat = json.load(file) with open(arg.data + os.sep + 'i2cap_train2017.json') as file: i2cap = json.load(file) with open(arg.data + os.sep + 'labels.json') as file: l2i = json.load(file) i2l = {v:k for k, v in l2i.items()} if arg.final: raise Exception('Not implemented yet.') else: images = list(i2cat.keys()) images_train = images[:-VAL] images_valid = images[-VAL:] cats_train, cats_valid = [], [] caps_train, caps_valid = [], [] # transform to caption -> categories for image in images_train: caps = i2cap[image] cats = i2cat[image] caps_train.extend(caps) cats_train.extend([cats] * len(caps)) for image in images_valid: caps = i2cap[image] cats = i2cat[image] caps_valid.extend(caps) cats_valid.extend([cats] * len(caps)) # sort by length of caption pairs = zip(caps_train, cats_train) caps_train, cats_train = zip(*sorted(pairs, key=lambda x : len(x[0]))) pairs = zip(caps_valid, cats_valid) caps_valid, cats_valid = zip(*sorted(pairs, key=lambda x : len(x[0]))) ntrain, nvalid = len(images_train), len(images_valid) max_cat = 90 if arg.task == 'imdb': l2i = {'pos':1, 'neg':0} i2l = {v: k for k, v in l2i.items()} with gzip.open(f'{here()}{os.sep}data{os.sep}imdb{os.sep}imdb.train.json.gz', 'r') as file: train = json.load(file) with gzip.open(f'{here()}{os.sep}data{os.sep}imdb{os.sep}imdb.test.json.gz', 'r') as file: test = json.load(file) caps_train = train['pos'] + train['neg'] cats_train = [[1]] * len(train['pos']) + [[0]] * len(train['neg']) pairs = zip(caps_train, cats_train) caps_train, cats_train = zip(*sorted(pairs, key=lambda x: len(x[0]))) ntrain, _ = len(caps_train), None max_cat = 1 # TODO split train into train/val, load test properly else: raise Exception(f'Task {arg.task} not recognized.') if arg.max_length is not None: caps_train = [s[:arg.max_length] for s in caps_train] # create the model model = GPT2Wrapper(iblocks=arg.iblocks, gptname=arg.gpt_name, csize=max_cat+1) if torch.cuda.is_available(): model.to('cuda') model.model.mod[0].to('cuda') model.tokenizer.padding_side = 'right' opt = torch.optim.Adam(lr=arg.lr, params=model.parameters()) seen = 0 for e in range(arg.epochs): if e % arg.print_every == 0: # Generate some random sequences for i in range(arg.nrandom): # generate a random category random_cat = random.choice(list(l2i.keys())) cats = torch.zeros(1, max_cat + 1) cats[0, l2i[random_cat]] = 1.0 # generate and print some random text seed = START input = torch.tensor(model.tokenizer.encode(seed)) if torch.cuda.is_available(): input, cats = input.to('cuda'), cats.to('cuda') outseq = [] for _ in range(arg.print_size): output = model(input[None, :], cond=cats) c = sample(output[0, -1, :], arg.sampling_temp) outseq.append(c) if c == model.tokenizer.bos_token_id: break input = torch.cat([input, c], dim=0) outseq = torch.cat(outseq, dim=0) outseq = model.tokenizer.decode(outseq) with open(f'random.e{e:03}i{i:02}.txt', 'w') as file: print('chosen category', random_cat, file=file) print('---------------------------------------------', file=file) print(seed, file=file) print(outseq, flush=True, file=file) pbar = tqdm.tqdm(total=len(caps_train)) fr = 0 while fr < ntrain: if arg.batch_char is None: # -- fixed nr of sequences per batch to = min(fr + arg.batch, ntrain) else: sum, to = 0, fr while sum < arg.batch_char and to < len(caps_train): sum += len(caps_train[to]) to += 1 bcats = cats_train[fr:to] bcaps = caps_train[fr:to] if arg.limit is not None and seen > arg.limit: break # print('length of sequences in batch', [len(s) for s in bcaps]) # print('-- total', builtins.sum([len(s) for s in bcaps]), len(bcaps)) # translate captions to tensors res = model.tokenizer.batch_encode_plus(bcaps, pad_to_max_length=True, max_length=max([len(s) for s in bcaps])) captions = res['input_ids'] pad_sequences(captions, token=model.tokenizer.pad_token_id, max_length=model.ctx-1) captions = torch.tensor(captions) b, t = captions.size() seen += b bos, pad = torch.tensor([[model.tokenizer.bos_token_id]]), torch.tensor([[model.tokenizer.bos_token_id]]) source = torch.cat([bos.expand(b, 1), captions], dim=1) target = torch.cat([captions, pad.expand(b, 1)], dim=1) # -- target is the same sequence as source, except one character ahead if arg.dropout > 0.0: source = source * torch.empty(source.size(1)).bernoulli_(arg.dropout).to(torch.long)[None, :] # token dropout # translate categories to n-hots cats = onehot(bcats, max_cat=max_cat) if torch.cuda.is_available(): source, target, cats = source.to('cuda'), target.to('cuda'), cats.to('cuda') try: output = model(source, cond=cats) except Exception as e: print('length of sequences in batch', [len(s) for s in bcaps]) print('-- total', builtins.sum([len(s) for s in bcaps]), len(bcaps)) print(bcaps) raise e loss = F.cross_entropy(output.transpose(2, 1), target, reduction='mean') tbw.add_scalar('podcasts/train-loss', float(loss.item()) * LOG2E, seen) opt.zero_grad() loss.backward() # clip gradients # - If the total gradient vector has a length > 1, we clip it back down to 1. if arg.gradient_clipping > 0.0: nn.utils.clip_grad_norm_(model.parameters(), arg.gradient_clipping) opt.step() # sch.step() fr = to pbar.update(b) pbar.close()
]) lensgh = np.array([sym.diff(lensfunc, u_x, u_x, u_x), \ sym.diff(lensfunc, u_x, u_x, u_y), \ sym.diff(lensfunc, u_x, u_y, u_y), \ sym.diff(lensfunc, u_y, u_y, u_y)]) # Use Sympy to turn the lens equations into Numpy functions using Sympy #lensfun = sym.lambdify([u_x, u_y, theta, phi, N, sigma], lensfunc, 'numpy') #lensg = sym.lambdify([u_x, u_y, theta, phi, N, sigma], lensg, 'numpy') #lensh = sym.lambdify([u_x, u_y, theta, phi, N, sigma], lensh, 'numpy') #lensgh = sym.lambdify([u_x, u_y, theta, phi, N, sigma], lensgh, 'numpy') #Gaussian screen functions & derivatives scrfun = lambda u_x, u_y, theta, phi, N, sigma : \ np.sqrt(2)*sigma*np.sqrt(1/N)*bt.sum(np.cos(u_x*np.sin(theta[j]) + \ u_y*np.cos(theta[j]) + phi[j]) for j in range(1, N-1)) scrgx = lambda u_x, u_y, theta, phi, N, sigma : \ np.sqrt(2)*sigma*np.sqrt(1/N)*bt.sum(-np.sin(u_x*np.sin(theta[j]) + \ u_y*np.cos(theta[j]) + phi[j])*np.sin(theta[j]) for j in range(1, N-1)) scrgy = lambda u_x, u_y, theta, phi, N, sigma : \ np.sqrt(2)*sigma*np.sqrt(1/N)*bt.sum(-np.sin(u_x*np.sin(theta[j]) + \ u_y*np.cos(theta[j]) + phi[j])*np.cos(theta[j]) for j in range(1, N-1)) scrgxx = lambda u_x, u_y, theta, phi, N, sigma : \ -np.sqrt(2)*sigma*np.sqrt(1/N)*bt.sum(np.sin(theta[j])**2*np.cos(u_x*np.sin(theta[j]) + \ u_y*np.cos(theta[j]) + phi[j]) for j in range(1, N-1)) scrgyy = lambda u_x, u_y, theta, phi, N, sigma : \ -np.sqrt(2)*sigma*np.sqrt(1/N)*bt.sum(np.cos(u_x*np.sin(theta[j]) + u_y*np.cos(theta[j]) + \ phi[j])*np.cos(theta[j])**2 for j in range(1, N-1)) scrgxy = lambda u_x, u_y, theta, phi, N, sigma : \ -np.sqrt(2)*sigma*np.sqrt(1/N)*bt.sum(np.sin(theta[j])*np.cos(u_x*np.sin(theta[j]) + \ u_y*np.cos(theta[j]) + phi[j])*np.cos(theta[j]) for j in range(1, N-1))
bigram_freq = nltk.FreqDist(nltk.bigrams(t)) ave_pmi = [] for doc in text: pmi = 0 #flatten each doc flatten_doc = [word for sent in doc for word in sent] #get the bigram dict of flatten doc bigram_dict = nltk.FreqDist(nltk.bigrams(flatten_doc)) for i in bigram_dict.keys(): prob_word1 = unigram_freq[i[0]] / float(sum(unigram_freq.values())) #print(prob_word1) prob_word2 = unigram_freq[i[1]] / float(sum(unigram_freq.values())) #print(prob_word2) prob_word1_word2 = bigram_freq[(i[0], i[1])] / float( sum(bigram_freq.values())) #print(prob_word1_word2) bigram_pmi = math.log( prob_word1_word2 / float(prob_word1 * prob_word2), 2) pmi += bigram_pmi ave_pmi.append(pmi / len(bigram_dict)) #print(ave_pmi[:5]) #append this result to the dataframe as RANK
def sum(iterable, start=0): return builtins.sum(iterable, start)
def shape(self): if hasattr(self, '_shape') and self._shape is not None: return self._shape if hasattr(self, '_nsplits') and self._nsplits is not None: self._shape = tuple(builtins.sum(nsplit) for nsplit in self._nsplits) return self._shape
Read the data files and prepare the data for calculations and graphs """ fd = read_data('flightdelays-2010-2020.csv') # fd.head() fd.keys() count = 0 ### Code below determines how if we need to do additional data wrangling. ### Total number of records print(len(fd)) ### Total number of keys print (fd.keys()) import builtins ### Total number of non-null values for key in fd.keys(): peds = fd[key] count = 0 if (type(peds[0]) == type("str")): count = builtins.sum(1 for e in peds if e != "") else: count =builtins.sum(1 for e in peds if e >= 0) print (f'{key:20} : {count:5}')