def run_exp(log_file, threads, rates, sleep_time, is_security=False, dropping=False, workload='ycsb'): start_parity() time.sleep(20) add_peers() time.sleep(30) start_clients(threads, rates, log_file, workload) if (not is_security) and (not dropping): time.sleep(sleep_time) else: if is_security: # for security part time.sleep(100) partition(NODES, TIMEOUT) time.sleep(sleep_time - 100 - TIMEOUT) else: # is dropping time.sleep(250) drop(NODES, 4) time.sleep(sleep_time - 250) kill() time.sleep(5)
def run_exp(log_file, threads, rates, sleep_time, is_security=False, dropping=False, workload='ycsb'): start_parity() print "sleep 20 for start_parity" time.sleep(20) add_peers() print "sleep 30 for add_peers" time.sleep(30) start_clients(threads, rates, log_file, workload) if (not is_security) and (not dropping): print "not security and not dropping" print "sleep " + str(sleep_time) + " for start_clients" time.sleep(sleep_time) else: if is_security: print "security part" # for security part time.sleep(100) partition(NODES, TIMEOUT) time.sleep(sleep_time-100-TIMEOUT) else: # is dropping print "dropping" time.sleep(250) drop(NODES, 4) time.sleep(sleep_time-250) kill() time.sleep(5)
def test_part1(): data = [-1, -2, -3] j = partition.partition(data, 0) assert j == 3 data = [1, 2, 3] j = partition.partition(data, 0) assert j == 0
def test_part2(): data = list(range(-5, 5)) j = partition.partition(data, 0) print(data) assert j == 5 data = list(range(5, -5, -1)) j = partition.partition(data, 0) print(data) assert j == 4
def test_partition(): def is_part(A, x): if all([y<x for y in A]): return true if all([y>x for y in A]): return true f = A.index(x) l = f while l<len(A) and A[l] == x: l += 1 print all([y < x for y in A[:f]] + [y==x for x in A[f:l]] + [y>x for x in A[l:]]) a = [3,45,2345,34,45,64,56,45,34,52,345,34,534] partition(a,1) assert is_part(a,45)
def test_partition(self): TEST_DATA = [ ([], 0), ([1], 1), ([1, 2, 3, 4, 5], 3), ([3, 2, 5, 1, 2, 4, 2], 3), ([3, 5, 8, 5, 10, 2, 1], 5)] for test in TEST_DATA: list = LinkedList(test[0]) print "Original list: %s" % list partition(list, test[1]) print "Partitioned list with pivot %d: %s" % (test[1], list) self.assertTrue(check_partition(list, test[1]))
def write_processed_tweets(raw_tweet_file, keyword_file, output_file): partitioned_tweets = partition.partition(raw_tweet_file, keyword_file) sentiment_tweets = {} for candidate, tweets in partitioned_tweets.items(): sentiment_tweets[candidate] = sentiment.run_sentiment_analysis( tweets, 'words') with open(output_file, 'w') as data_file: data_file.write('{\n') first_candidate = True for candidate, tweets in sentiment_tweets.items(): if first_candidate: first_candidate = False else: data_file.write(',\n') data_file.write('"' + candidate + '": [\n') first_item = True for tweet in tweets: if first_item: first_item = False else: data_file.write(',\n') data_file.write(json.dumps(tweet)) data_file.write(']\n') data_file.write('}')
def quick_sort_r(a, lb, ub): if lb < ub: partition_point = partition(a, lb, ub) quick_sort_r(a, lb, partition_point-1) quick_sort_r(a, partition_point+1, ub) return a
def build_tree(self, rows): """Recursively builds decision tree""" # Try partitioing the dataset on each of the unique attribute, # calculate the information gain, # and return the question that produces the highest gain. gain, question = find_optimal_split(rows) # Base case: no further info gain # Since we can ask no further questions, # we'll return a leaf. if gain == 0: return Leaf(rows) # If we reach here, we have found a useful feature / value # to partition on. true_rows, false_rows = partition(rows, question) # Recursively build the true branch. true_branch = self.build_tree(true_rows) # Recursively build the false branch. false_branch = self.build_tree(false_rows) # Return a Question node. # This records the best feature / value to ask at this point, # as well as the branches to follow # dependingo on the answer. return DecisionNode(question, true_branch, false_branch)
def test_partitions_when_partitions_dont_fit_perfectly(self): partition_size = 10 rows = 95 partitions = partition(rows, partition_size) self.assertEqual(10, len(partitions)) expected_partitions = [{'start': 0, 'end': 9}, {'start': 10, 'end': 19}, {'start': 20, 'end': 29}, {'start': 30, 'end': 39}, {'start': 40, 'end': 49}, {'start': 50, 'end': 59}, {'start': 60, 'end': 69}, {'start': 70, 'end': 79}, {'start': 80, 'end': 89}, {'start': 90, 'end': 94}] self.assertEqual(expected_partitions, partitions)
def quick2(a): ''' :param a: :return: ''' total_elements = len(a) STACK_SIZE = total_elements low = 0 high = total_elements - 1 pivot_point = 0 stack_ptr = -1 low_stack = [0] * STACK_SIZE high_stack = [0] * STACK_SIZE pass_count = 1 while True: # is non empty stack? set indexes if (stack_ptr > -1): low = low_stack[stack_ptr] high = high_stack[stack_ptr] stack_ptr = stack_ptr - 1 while low < high: print('a, before partition: {0}'.format(a)) print('low : {0}'.format(low)) print('high: {0}'.format(high)) print('pivot_point: {0}'.format(pivot_point)) pivot_point = partition(a, low, high) if pivot_point - low < high - pivot_point: if stack_ptr >= STACK_SIZE: print('Stack overflow. Cannot complete sort') return stack_ptr = stack_ptr + 1 low_stack[stack_ptr] = pivot_point + 1 high_stack[stack_ptr] = high high = pivot_point - 1 else: if stack_ptr >= STACK_SIZE: print('Stack overflow. Cannot complete sort') return stack_ptr = stack_ptr + 1 low_stack[stack_ptr] = low high_stack[stack_ptr] = pivot_point - 1 low = pivot_point - 1 pass_count = pass_count + 1 print('pass #: {0}'.format(pass_count)) print('low_stack : {0}'.format(low_stack)) print('high_stack: {0}'.format(high_stack)) print('a: {0}'.format(a)) # is empty stack? break if stack_ptr <= -1: break
def testConvertPartitionToPacking(): from packing import packing from partition import partition instances = [ '5', '5 6', '5 6 7', '5 6 7 8', '', '5 7', '6 6', '6 6 7 7', '10 20 30 40 11 21 31 41', ] for instance in instances: convertedInstance = convertPartitionToPacking(instance) instanceSolution = partition(instance) convertedInstanceSolution = packing(convertedInstance) revertedSolution = convertedInstanceSolution utils.tprint(instance, 'maps to', convertedInstance,\ ' solutions were: ', instanceSolution, ';', convertedInstanceSolution) if revertedSolution == 'no': assert instanceSolution == 'no' else: total = sum([int(x) for x in instance.split()]) solutionTotal = sum([int(x) for x in revertedSolution.split()]) assert solutionTotal * 2 == total
def test_input_2(self): # Failure message: # expected partition(["hi", None, 6, "bye"], isString) # to equal [ [ "hi", "bye" ], [ None, 6 ] ] self.assertEqual(partition(["hi", None, 6, "bye"], self.is_string), [ ["hi", "bye"], [None, 6]])
def createPartition(self,size=""): num_list = range(1,5) for i in self.partitions: num_list.remove(self.partitions[i].number) os.system("fdisk %s << EOF\nn\np\n%s\n\n%s\nw" % (self.path, num_list[0],size)) self.partitions["%s%s" % (self.path,num_list[0])] = partition(self, "%s%s" % (self.path,num_list[0])) return self.partitions["%s%s" % (self.path,num_list[0])]
def test_partition(): s = 6 bound = (5, 4) expect = [(2, 4), (3, 3), (4, 2), (5, 1)] actual = partition(s, bound) for tuple_a, tuple_b in zip(actual, expect): array_equal(tuple_a, tuple_b)
def incomparable(comparisons, record_list): non_comparables = record_list comparables = [] for comp in comparisons: dominants, non_dominants, non_comparables = \ partition(non_comparables, comp) comparables += dominants + non_dominants return comparables, non_comparables
def Inddist(part, nsample, pvariates, iterations): T = [] for i in range(iterations): W1 = wishart.rvs(df=nsample - 1, scale=np.eye(pvariates)) W2 = wishart.rvs(df=nsample - 1, scale=W1 / (nsample - 1)) W2_11, W2_12, W2_21, W2_22 = partition(W2, part, part) T = np.append(T, det(W2) / (det(W2_11) * det(W2_22))) return T
def ocv_grouping(bboxes, min_neighbours): """ filtering/grouping of overlapping detections (bounding boxes) as done in opencv::detectMultiScale. can be used instead of non maximum suppression. test implementation, slow performance Args: bboxes: list of tuples of bounding boxes od the corresponding confidence score [(rect, score)] min_neighbours: minimum neighbours threshold Returns: list of bounding boxes [Rect] """ result = [] avg_bboxes = [] class_count = [] # comparator function def compare(a, b): return Rect.compare(a[0], b[0]) # cluster candidate bboxes into n classes, each class represents equvalent rectangles labels, num_classes = partition(bboxes, compare) # init vars for i in xrange(num_classes): avg_bboxes.append(Rect(0,0,0,0)) class_count.append(0) # calc average bounding box of each class for i in xrange(len(bboxes)): j = labels[i] avg_bboxes[j] += bboxes[i][0] class_count[j] += 1 # select valid bboxes for i in xrange(num_classes): # reject classes with count < min_neighbours if class_count[i] < min_neighbours: continue # calc average avg_bboxes[i] /= class_count[i] # reject average bounding boxes which are inside other candidates reject = False for j in range(num_classes): if class_count[j] < min_neighbours: continue if i != j and avg_bboxes[j].contains(avg_bboxes[i]): reject = True break; # add to results if not rejected if not reject: result.append(avg_bboxes[i].center()) return result
def quicksort ( n, p, r ): gb.time += 1 if ( p < r ): q = partition ( n, p, r ) gb.time += 1 quicksort ( n, p, q - 1 ) gb.time += 1 quicksort ( n, q + 1, r ) gb.time += 1
def qselect(start, end, k, data): while low<high: pivot = partition(start,end,data): if k < pivot: end = pivot - 1 elif k > pivot: start pivot + 1 else: return data[k]
def Canodist(part, nsample, pvariates, iterations): T = [] for i in range(iterations): W1 = wishart.rvs(df=nsample - 1, scale=np.eye(pvariates)) W2 = wishart.rvs(df=nsample - 1, scale=W1 / (nsample - 1)) W2_11, W2_12, W2_21, W2_22 = partition(W2, part, part) Q = W2_12.dot(inv(W2_22).dot(W2_21)) T = np.append(T, det(Q) / det(W2_11 - Q)) return T
def quicksort(my_list, start=0, end=None): if end == None: end = len(my_list) - 1 if start >= end: return pivot_index = partition(my_list, start, end) quicksort(my_list, start, pivot_index - 1) quicksort(my_list, pivot_index + 1, end)
def main(args): order_file = args.pop(0) order = list(parse_order(_slurp(order_file))) data = [] for path in args: data.extend(_slurp(path)) for part in partition(order, data): print(part)
def quicksort(arr, low, high): if low < high: # pi is partitioning index, arr[p] is now # at right place pi = partition(arr, low, high) # Separately sort elements before # partition and after partition quicksort(arr, low, pi - 1) quicksort(arr, pi + 1, high)
def quickSort(arr, begin, end): ''' partition(arr,0,len-1) 把pivot_index右边和左边的再分别放进去 直到数组长度<=1 ''' if (end - begin > 1): pivot_index = partition(arr, begin, end) # 因为partition是直接在原数组上操作,没有再进行拼接操作,所以一定要传begin, end的index quickSort(arr, 0, pivot_index - 1) quickSort(arr, pivot_index + 1, end) return arr
def quick2(a): ''' This method is an implementation of non recursive quick sort using two stacks. :param a: array of data :return : sorted array a ''' total_elements = len(a) STACK_SIZE = total_elements low = 0 high = total_elements - 1 pivot_point = 0 stack_ptr = -1 low_stack = [0] * STACK_SIZE high_stack = [0] * STACK_SIZE pass_count = 1 while True: # is non empty stack? set indexes if (stack_ptr > -1): low = low_stack[stack_ptr] high = high_stack[stack_ptr] stack_ptr = stack_ptr - 1 while low < high: pivot_point = partition(a, low, high) if pivot_point - low < high - pivot_point: if stack_ptr >= STACK_SIZE: print('Stack overflow. Cannot complete sort') return stack_ptr = stack_ptr + 1 low_stack[stack_ptr] = pivot_point + 1 high_stack[stack_ptr] = high high = pivot_point - 1 else: if stack_ptr >= STACK_SIZE: print('Stack overflow. Cannot complete sort') return stack_ptr = stack_ptr + 1 low_stack[stack_ptr] = low high_stack[stack_ptr] = pivot_point - 1 low = pivot_point - 1 pass_count = pass_count + 1 # is empty stack? break if stack_ptr <= -1: break
def recognition_init(base_path): train_dir = "%s/%s"%(base_path, TRAIN_DIR) # assume crop_images/ exists # initial faces are not partitioned if (not os.path.exists(train_dir) or (len(os.listdir(train_dir)) == 0)): print("Partition initial database into training and test set") partition(base_path) reset_labels(base_path) (train_face_data, train_face_labels) = get_initial_train_data(base_path) (mean_face, num_eigen, eigen_vals, eigen_vecs, weights) = train(train_face_data) # verify model health test(base_path, train_face_labels, mean_face, num_eigen, eigen_vals, eigen_vecs, weights) # read the existing label file label_file = open("%s/labels"%(base_path), mode="r") names = [] for line in label_file.readlines(): names.append(line.strip()) return (train_face_data, train_face_labels, names, mean_face, num_eigen, eigen_vals, eigen_vecs, weights)
def test_partition(self): new_list = partition(self.default_list, 5) current = new_list.head self.assertTrue(current.get_data() < 5) current = current.get_next() self.assertTrue(current.get_data() < 5) current = current.get_next() self.assertTrue(current.get_data() < 5) current = current.get_next() self.assertTrue(current.get_data() >= 5) current = current.get_next() self.assertTrue(current.get_data() >= 5) current = current.get_next() self.assertTrue(current.get_data() >= 5)
def partition_mbest(theory, record_list): ''' Get best records by partitioning the record list based on each comparison and separating the dominant records and discarding the dominated ones ''' dominants = list(record_list) dominants, _ = incomparable(theory.get_comparison_list(), dominants) # for each comparison, verify dominant records for comp in theory.get_comparison_list(): dominants, _, non_comparables = partition(dominants, comp) dominants = dominants + non_comparables return dominants
def test_partition1(): ''' Example: 3-> 5 -> 8 -> 5 -> 10 -> 2 -> 1 [partition = 5] Output: 3 -> 1 -> 2 -> 10 -> 5 -> 5 -> 8 ''' list = ll.Node(3) list.next = ll.Node(5) list.next.next = ll.Node(8) list.next.next.next = ll.Node(5) list.next.next.next.next = ll.Node(10) list.next.next.next.next.next = ll.Node(2) list.next.next.next.next.next.next = ll.Node(1) newList = partition.partition(list, 5) assert newList.next.val == 3
def choose_split(data, treshold): """Find the best question to ask by iterating over every feature / value and calculating the information gain.""" n_features = len(data[0]) - 1 # number of columns quest_gain = [] # keep track of the gains and questions for col in range(1, n_features): # for each feature values = set([row[col] for row in data]) # unique values in the column for val in values: # for each value question = Question(col, val) # try splitting the dataset true_rows, false_rows = partition(data, question) # Skip this split if it doesn't divide the dataset. if len(true_rows) == 0 or len(false_rows) == 0: continue # Calculate the information gain from this split gain = info_gain(data, true_rows, false_rows) quest_gain.append(Question_gain(gain, question)) possible_question = [] # possible questions to ask n_quest_gain = len(quest_gain) if n_quest_gain == 0: return float('Inf'), float('NaN') # for x in range(n_quest_gain): if (quest_gain[x].gain >= treshold): possible_question.append( Question_gain(quest_gain[x].gain, quest_gain[x].question)) n_possible_question = len(possible_question) if n_possible_question == 0: return float('Inf'), float('NaN') if n_possible_question >= 2: [i, j] = random.sample(range(0, n_possible_question), 2) else: i = j = random.randint(0, n_possible_question - 1) if possible_question[i].gain >= possible_question[j].gain: return possible_question[i].gain, possible_question[i].question else: return possible_question[j].gain, possible_question[j].question
def calc_dG_gap(sequence_structure_params_tuple): sequence_structure_pair = sequence_structure_params_tuple[:-1] params = sequence_structure_params_tuple[-1] sequence, structure = sequence_structure_pair[0:2] dG_structure = score_structure(sequence, structure, params=params) force_base_pairs = None if len(sequence_structure_pair) > 2: force_base_pairs = sequence_structure_pair[2] p = partition(sequence, params=params, suppress_all_output=True, mfe=True, force_base_pairs=force_base_pairs) dG = p.dG dG_gap = dG_structure - dG # will be a positive number, best case zero. print p.struct_MFE, dG_gap return dG_gap
def partition_mtopk(theory, record_list, k): ''' Separate the top-k most dominant tuples The algorithm repeatedly scans the set of dominated tuples progressively populating the return list ''' # initially assumes all dominant return_list = [] dominant_recs = list(record_list) dominant_recs, _ = \ incomparable(theory.get_comparison_list(), dominant_recs) while len(return_list) < k and dominant_recs: temporary_list = [] # for each comparison, verify dominant records for comp in theory.get_comparison_list(): dominant_recs, non_dominant_recs, non_comparable = \ partition(dominant_recs, comp) temporary_list = temporary_list + non_dominant_recs dominant_recs = dominant_recs + non_comparable return_list = return_list + dominant_recs dominant_recs = temporary_list return return_list[0:k]
def write_processed_tweets(raw_tweet_file, keyword_file, output_file): partitioned_tweets = partition.partition(raw_tweet_file, keyword_file) sentiment_tweets = {} for candidate, tweets in partitioned_tweets.items(): sentiment_tweets[candidate] = sentiment.run_sentiment_analysis(tweets, 'words') with open(output_file, 'w') as data_file: data_file.write('{\n') first_candidate = True for candidate, tweets in sentiment_tweets.items(): if first_candidate: first_candidate = False else: data_file.write(',\n') data_file.write('"' + candidate + '": [\n') first_item = True for tweet in tweets: if first_item: first_item = False else: data_file.write(',\n') data_file.write(json.dumps(tweet)) data_file.write(']\n') data_file.write('}')
def spike(A, b, config, output = True, debug = False) : # Check the values if (config['partitionNumber'] < 2) : raise ValueError("The partitionNumber has to be at least 2 but it is", config['partitionNumber']) elif (config['matrixSize'] / config['partitionNumber'] < config['bandwidth']) : raise ValueError("The number of partitions must be smaller or equal to", config['matrixSize'] / config['bandwidth'], "but it is", config['partitionNumber']) elif (config['matrixSize'] % config['partitionNumber'] != 0) : raise ValueError("The matrixSize should be devideable by the number of partitions. given:", config['matrixSize'], config['partitionNumber']) else : # Determine the size of each partition config['partitionSize'] = config['matrixSize'] / config['partitionNumber'] # make bandwidth even if (config['bandwidth'] % 2 != 0) : config['bandwidth'] += 1 config['rhsSize'] = b.shape[1] config['offdiagonalSize'] = config['bandwidth']/2 #(bandwidth-1)/2 if (output) : print config print "input A:" print A.todense() print "input b:" print b.todense() # create prerequirements for OpenCL ctx = cl.create_some_context(True) #for platform in cl.get_platforms() : # devices = platform.get_devices(cl.device_type.CPU) # ctx = cl.Context(devices) queue = cl.CommandQueue(ctx) # compile programm code for CL directory = os.path.dirname(os.path.realpath(__file__)) gaussFile = open(os.path.join(directory, "gauss.cl"), 'r') gaussCode = ''.join(gaussFile.readlines()) program = cl.Program(ctx, gaussCode).build() # 1. Pre-processing # 1.1 Partitioning of the original system onto different processors start = time() buffers = partition.partition(config, ctx, A, b, debug) # 1.2 Factorization of each diagonal block # solve A_j[V_j, W_j, G_j] = [(0 ... 0 B_j)T, (C_j 0 ... 0)T, F_j] # this step also involves solving of A_j G_j = F_j from (2.1) factor.factor(config, ctx, queue, program, buffers) # At this point we can free the A buffer (buffers[0]) # TODO make the memory release dependent on some event #buffers[0].release() # 2. Post-processing # 2.1 Solving the reduced system buffers = solve.reduced(config, ctx, queue, program, buffers, debug) # At this point we can free the SG buffer (buffers[2]) #buffers[2].release() # 2.2 Retrieving the overall solution x = solve.final(config, ctx, queue, program, buffers, debug) stop = time() rt = stop-start return [x, rt]
def main(): """ definition of main scripting file for debugging purposes. """ # time the execution starttime = time() # in the serial version, lets just set rank=0 rank = 0 #---------------SET PARAMETERS ----------- if rank == 0: print "Setting up umbrella sampling parameters." params = {} # set the scratch directory for the calculation files params['scratchdir'] = "/Users/jtempkin/enhanced_sampling_toolkit/neus/debug_US" params['inputFilename'] = "/Users/jtempkin/enhanced_sampling_toolkit/neus/input.diala" params['logFilename'] = params['scratchdir'] + "/log" # here we will set the umbrella sampling parameters in the params dict params['ncells'] = 144 params['cellWidth'] = 15 params['nwalkers'] = 1 params['walkerSteps'] = 50000 params['stepLength'] = 10 params['Ftype'] = 'transition' # lets set the dynamics parameters that are needed to specify the walker params['temperature'] = 310.0 params['timestep'] = 1.0 #--------------- INITIALIZATION-------- # only allow root rank to build files if rank == 0: print "Building scratch directory." # construct the wkdir. This is where the intermediate dynamcs files will # be written. fileIO.makeWkdir(params['scratchdir']) # construct the umbrella data structure if rank == 0: print "Initializing the umbrella structure." # create the partition object system = partition.partition(params['ncells']) # now we construct the umbrella windows if rank == 0: print "Building the umbrellas." # specify the list of boundaries, nboxes, 1/2 width of the windows umbParams = {} # right now, we will hardcode a 12x12 array using Erik's routine for gridding a space umbParams['cvrange'] = np.array([map(float,entry.split(",")) for entry in "-180,180,12,15;-180,180,12,15".split(";")]) umbParams['wrapping'] = np.array(map(float, "1,1".split(','))) system.umbrellas = fileIO.createUmbrellas(umbParams) # build neighbor list #system.buildNeighborList() #----------------INITIALIZE THE ENTRY POINTS FROM FILE------------ # now we provide a data structure for entrypoints for each umbrella: for i in range(len(system.umbrellas)): system.umbrellas[i].entryPoints = [] # now load each entry point file from the data base and load as an array of # ctypes pointers for i in range(len(system.umbrellas)): # load the numpy array data = np.load("entryPoints/in_" + str(i) + "_w0.entryPoints.npy") # now add each as a ctypes points to the entry points library for j in range(data.shape[0]): system.umbrellas[i].entryPoints.append(data[j].ctypes.data_as(ctypes.POINTER(ctypes.c_double))) """ #------------ GENERATE INITIAL ENTRY POINTS -------------- # sample umbrellas and construct F if rank == 0: print "Seeding entry points from a conventional simulation." # this for i in range(len(system.umbrellas)): print i print "starting walker" # lets instantiate a walker object to sample this window. wlkr = lammpsWalker.lammpsWalker(params['inputFilename']) print "minimize" # minimize structure prior to dynamics wlkr.minimize() # set the dynamics to sample by langevin wlkr.command("fix 1 all nve") wlkr.command("fix 2 all langevin 310.0 310.0 30.0 20874") print "setting colvars" # set colvars for this walker (currently, alanine dipeptide dihedrals) wlkr.colvars.append(['dihedral', 5, 7, 9, 15]) wlkr.colvars.append(['dihedral', 7, 9, 15, 17]) # set an array of starting/stoping restraints for equilibration restraint = [[0.0, 100.0], [0.0, 100.0]] # now specify colvars to dynamics routines wlkr.setColvars() # equilibrate the walker to the target point in CV space wlkr.equilibrate(system.umbrellas[i].center, restraint, 100000) # enter the sampling routine. This sampling routine will simply generate the initial # entry point distribution try: system.sample(wlkr, params['walkerSteps'], i, 0, params, rank) except errors.DynamicsError: print "Rank", rank, "sampling error occured in umbrella", i, "." continue # now we are done populating the samples array, close the walker wlkr.close() # now we write out the entrypoints for each umbrella: for i in range(len(system.umbrellas)): np.save(params['scratchdir'] + "/in_" + str(i) + "_w0.entryPoints", system.umbrellas[i].entryPoints) """ #----------------MAIN LOOP---------------- if rank == 0: print "Sampling via NEUS." # this for i in range(len(system.umbrellas)): print "Rank", rank, "sampling umbrella", i, "." # lets instantiate a walker object to sample this window. wlkr = lammpsWalker.lammpsWalker(params['inputFilename'], params['logFilename'], index=i) wlkr.setTimestep(params['timestep']) # set the dynamics to sample by langevin wlkr.command("fix 1 all nve") # the langevin fix here sets the temperature to 310K and the friction # coefficient to 30 ps-1 wlkr.command("fix 2 all langevin " + " ".join([str(params['temperature']), str(params['temperature'])]) + " 30.0 20874") # set colvars for this walker (currently, alanine dipeptide dihedrals) wlkr.colvars.append(['dihedral', 5, 7, 9, 15]) wlkr.colvars.append(['dihedral', 7, 9, 15, 17]) # now specify colvars to dynamics routines wlkr.setColvars() # now we initialize the starting coordinates from the entry points library temp_indx = random.randint(0, len(system.umbrellas[i].entryPoints)-1) print system.umbrellas[i].entryPoints[temp_indx], temp_indx print wlkr.lmp wlkr.setConfig(system.umbrellas[i].entryPoints[temp_indx]) wlkr.command("run 0 post no") print "drawing velocities" # draw the velocities uniformly for now wlkr.drawVel(distType = 'gaussian', temperature = params['temperature']) # enter the sampling routine. This sampling routine will simply generate the initial # entry point distribution try: system.sampleNeus(wlkr, params['walkerSteps'], i, 0, params, rank) except errors.DynamicsError: print "Rank", rank, "sampling error occurred in umbrella", i, "." continue # now we are done populating the samples array, close the walker wlkr.close() del wlkr #gc.collect() #----------------WRITE OUT DATA------------- # now allow rank 0 to process data. if rank == 0: print system.F fileIO.writeMat(system.F, params['scratchdir'] + "/F.out") """ print "Entering eigenvalue routine." # solve eigenvalue problem for F system.getZ() fileIO.writeMat(system.z, params['scratchdir'] + "/z.out") print "Computing the sensitivities." bounds = system.getlogBound(system.F) fileIO.writeMat(bounds, params['scratchdir'] + "/bounds.out") """ # now we will perform an analysis of the data and increase sampling of # windows with high variance if rank == 0: print "Done!" print "Total wallclock time was: " + str(time() - starttime) + " seconds." return 0
#!/usr/bin/env python from partition import partition, select_the_one_partition while True: w = input("Cлово:") w = w.strip().replace('ь', '').replace('ъ', '') partitions = partition(w) the_partition = select_the_one_partition(partitions) print('and the one is:', the_partition)
def main(): """ definition of main scripting file for debugging purposes. """ # time the execution starttime = time() # in the serial version, lets just set rank=0 rank = 0 #---------------SET PARAMETERS ----------- if rank == 0: print "Setting up umbrella sampling parameters." params = {} # set the scratch directory for the calculation files params['scratchdir'] = "/Users/jtempkin/enhanced_sampling_toolkit/umbrella_sampling/debug_US" params['inputFilename'] = "/Users/jtempkin/enhanced_sampling_toolkit/umbrella_sampling/input.diala" # here we will set the umbrella sampling parameters in the params dict params['ncells'] = 144 params['cellWidth'] = 60.0 params['nwalkers'] = 1 params['walkerSteps'] = 10000 params['stepLength'] = 10 params['Ftype'] = 'transition' # lets set the dynamics parameters that are needed to specify the walker params['temperature'] = 310.0 #--------------- INITIALIZATION-------- # only allow root rank to build files if rank == 0: print "Building scratch directory." # construct the wkdir. This is where the intermediate dynamcs files will # be written. fileIO.makeWkdir(params['scratchdir']) # construct the umbrella data structure if rank == 0: print "Initializing the umbrella structure." # create the partition object system = partition.partition(params['ncells']) # now we construct the umbrella windows if rank == 0: print "Building the umbrellas." # specify the list of boundaries, nboxes, 1/2 width of the windows umbParams = {} # right now, we will hardcore a 12x12 array using Erik's routine for gridding a space umbParams['cvrange'] = np.array([map(float,entry.split(",")) for entry in "-180,180,12,30;-180,180,12,30".split(";")]) umbParams['wrapping'] = np.array(map(float, "1,1".split(','))) system.umbrellas = fileIO.createUmbrellas(umbParams) #------------ MAIN LOOP -------------- # sample umbrellas and construct F if rank == 0: print "Entering main loop." for i in range(len(system.umbrellas)): print "Rank", rank, "sampling umbrella", i, "." print "init walker" # lets instantiate a walker object to sample this window. wlkr = lammpsWalker.lammpsWalker(params['inputFilename']) print "minimize" # minimize structure prior to dynamics wlkr.minimize() # set the dynamics to sample by langevin wlkr.command("fix 1 all nve") wlkr.command("fix 2 all langevin 310.0 310.0 30.0 20874") # set colvars for this walker (currently, alanine dipeptide dihedrals) wlkr.colvars.append(['dihedral', 5, 7, 9, 15]) wlkr.colvars.append(['dihedral', 7, 9, 15, 17]) # set an array of starting/stoping restraints for equilibration restraint = [[0.0, 100.0], [0.0, 100.0]] print "setting colvars" # now specify colvars to dynamics routines wlkr.setColvars() print "equilibrating" # equilibrate the walker to the target point in CV space wlkr.equilibrate(system.umbrellas[i].center, restraint, 100000) # enter the sampling routine try: system.sample(wlkr, params['walkerSteps'], i, 0, params, rank) except errors.DynamicsError: print "Rank", rank, "sampling error occured in umbrella", i, "." continue # now we are done populating the samples array, close the walker wlkr.close() #----------------WRITE OUT DATA------------- # now allow rank 0 to process data. if rank == 0: print system.F fileIO.writeMat(system.F, params['wkdir'] + "/F.out") print "Entering eigenvalue routine." # solve eigenvalue problem for F system.getZ() fileIO.writeMat(system.z, params['wkdir'] + "/z.out") print "Computing the sensitivities." bounds = system.getlogBound(system.F) fileIO.writeMat(bounds, params['wkdir'] + "/bounds.out") # now we will perform an analysis of the data and increase sampling of # windows with high variance if rank == 0: print "Done!" print "Total wallclock time was: " + str(time() - starttime) + " seconds." return 0
comm = MPI.COMM_WORLD rank = comm.Get_rank() host = commands.getoutput("hostname") # Split frequency vector into smaller chunks, pass each chunk to a process nProcs = 1.0*comm.Get_size() nFreqs = 1.0*np.shape(freqs)[0] nSteps = np.ceil(nFreqs/nProcs).astype(int) # Shuffle the frequency vector (adjacent frequencies take about as long to run) # np.random.shuffle(freqs) # chunks = [freqs[i:i+nSteps] for i in range(0, len(freqs), nSteps)] chunks = partition(freqs, nProcs) # print "Subprocess %s on %s:"%(rank, host) # create output directories, if none exist if rank==0: print "We have %d processes available"%(nProcs) if not os.path.exists(out_dir): os.mkdir(out_dir) if not os.path.exists(log_dir): os.mkdir(log_dir)
else: tasklist = None sc = None tasklist = comm.bcast(tasklist, root=0) sc = comm.bcast(sc, root=0) nTasks = 1.0*len(tasklist) nProcs = 1.0*comm.Get_size() nSteps = np.ceil(nTasks/nProcs).astype(int) chunks = partition(tasklist, nProcs) if (rank < len(chunks)): print "Process %d on host %s, doing %g jobs"%(rank, host, len(chunks[rank])) for job in chunks[rank]: inlat = job[0] outlat = job[1] outlon = job[2] figdir = os.path.join(root_dir, 'figures','in_%d/lon_%d'%(inlat, outlon)) # if not os.path.exists(os.path.join(root_dir, 'figures','in_%d'%inlat)): # os.mkdir(os.path.join(root_dir,'figures','in_%d'%inlat)) # if not os.path.exists(figdir): # os.mkdir(figdir)
def test_partition(self): a = [2, 8, 7, 1, 3, 5, 6, 4] partition(a, 0, 7) self.assertEquals(a, [2, 1, 3, 4, 7, 5, 6, 8])
def print_alignment_data(self, multiread_reports_and_ties, count=1): """ Prints almost-SAM alignments, introns/indels, and exonic coverage. Descriptions of output: Alignments (sam_intron_ties) output only for ties in alignment score (which are within some tie margin as decided by multiread_to_report); this is the first element of multiread_reports_and_ties tab-delimited output tuple columns: Standard SAM output except fields are in different order -- and the first four fields include sample/intron information. If an alignment overlaps k introns, k lines are output. The order of the fields is as follows. 1. The character 'N' so the line can be matched up with intron bed lines 2. Sample index 3. Number string representing RNAME; see BowtieIndexReference class in bowtie_index for conversion information 4. Intron start position 5. Intron end position 6. '+' or '-' indicating which strand is sense strand 7. '-' to ensure that the line follows all intron lines 8. POS 9. QNAME 10. FLAG 11. MAPQ 12. CIGAR 13. RNEXT 14. PNEXT 15. TLEN 16. SEQ 17. QUAL ... + optional fields (sam_clip_ties) output only for ties in alignment score when no introns are overlapped -- these alignments are almost invariably soft-clipped [SAME AS SAM FIELDS; see SAM format specification] (sam) output only for alignments to be reported (first element of tuple multiread_reports_and_ties) score +/- tie_margin); tab-delimited output tuple columns: Standard SAM output except fields are in different order to faciliate partitioning by sample/RNAME and coordinate sorting. The order of the fields is as follows. 1. Sample index if outputting BAMs by sample OR sample-rname index if outputting BAMs by chr 2. (Number string representing RNAME; see BowtieIndexReference class in bowtie_index for conversion information) OR '0' if outputting BAMs by chr 3. POS 4. QNAME 5. FLAG 6. MAPQ 7. CIGAR 8. RNEXT 9. PNEXT 10. TLEN 11. SEQ 12. QUAL ... + optional fields Exonic chunks (aka ECs; two formats, any or both of which may be emitted -- only if primary alignment is present among alignments to be reported): Exonic chunks in interval format (exon_ival); tab-delimited output tuple columns: 1. Reference name (RNAME in SAM format) + ';' + bin number 2. Sample index 3. EC start (inclusive) on forward strand 4. EC end (exclusive) on forward strand Exonic chunks in diff format (exon_diff) -- only if primary alignment is present among alignments to be reported; tab-delimited output tuple columns: 1. Reference name (RNAME in SAM format) + ';' + bin number 2. Sample index 3. Position at which diff should be subtracted or added to coverage 4. '1' if alignment from which diff originates is "unique" according to --tie-margin criterion; else '0' 5. +1 or -1 * count, the number of instances of a read sequence for which to print exonic chunks Introns (intron_bed) / insertions/deletions (indel_bed); tab-delimited output tuple columns: 1. 'I', 'D', or 'N' for insertion, deletion, or intron line 2. Number string representing RNAME 3. Start position (Last base before insertion, first base of deletion, or first base of intron) 4. End position (Last base before insertion, last base of deletion (exclusive), or last base of intron (exclusive)) 5. '+' or '-' indicating which strand is the sense strand for introns, inserted sequence for insertions, or deleted sequence for deletions 6. Sample index ----Next fields are for introns only; they are '\x1c' for indels--- 7. Number of nucleotides between 5' end of intron and 5' end of read from which it was inferred, ASSUMING THE SENSE STRAND IS THE FORWARD STRAND. That is, if the sense strand is the reverse strand, this is the distance between the 3' end of the read and the 3' end of the intron. 8. Number of nucleotides between 3' end of intron and 3' end of read from which it was inferred, ASSUMING THE SENSE STRAND IS THE FORWARD STRAND. ------------------------------------------------------------------- 9. Number of instances of intron, insertion, or deletion in sample; this is always +1 * count before bed_pre combiner/reducer multiread_reports_and_ties: either: 1) 2-tuple whose second element is a list of "tied" alignments and whose first element is a list of "resolved" alignments that are ready to be output as secondary SAM lines; tied alignments will be resolved by determining primary in a subsequent step. No alignment is primary yet. 2) Tuple whose sole element is a list of resolved alignments, where one alignment is a primary. Every QNAME takes the form <original_qname> + '\x1d' + <short hash of original_qname + seq + sample label> + '\x1d' + <sample_label> manifest_object: object of type LabelsAndIndices; see manifest.py reference_index: object of type BowtieIndexReference; see bowtie.py output_stream: where to print output exon_ivals: True iff exon_ivals should be output exon_diffs: True iff exon_diffs should be output count: number of alignments for which to output exon_ivals, exon_diffs, indels, and introns Return value: output line count """ output_line_count = 0 try: primary_flag = int(multiread_reports_and_ties[0][0][1]) except IndexError: # No alignments to report pass else: sample_index = self.manifest_object.label_to_index[ multiread_reports_and_ties[0][0][0].rpartition('\x1d')[2] ] if count and not (primary_flag & 256): '''First alignment to report is a primary, so output exons, introns, and indels.''' alignment = multiread_reports_and_ties[0][0] cigar = alignment[5] rname = alignment[2] pos = int(alignment[3]) seq = alignment[9] md = [field for field in alignment if field[:5] == 'MD:Z:'][0][5:] insertions, deletions, introns, exons \ = indels_introns_and_exons(cigar, md, pos, seq, drop_deletions=self.drop_deletions) # Output indels for insert_pos, insert_seq in insertions: print >>self.output_stream, ( ('indel_bed\tI\t%s\t%012d\t%012d\t%s\t%s' '\t\x1c\t\x1c\t%d') % (self.reference_index.rname_to_string[rname], insert_pos, insert_pos, insert_seq, sample_index, count) ) output_line_count += 1 for del_pos, del_seq in deletions: print >>self.output_stream, ( ('indel_bed\tD\t%s\t%012d\t%012d\t%s\t%s' '\t\x1c\t\x1c\t%d') % (self.reference_index.rname_to_string[rname], del_pos, del_pos + len(del_seq), del_seq, sample_index, count) ) output_line_count += 1 # Output exonic chunks if self.exon_ivals: for exon_pos, exon_end_pos in exons: partitions = partition.partition( rname, exon_pos, exon_end_pos, self.bin_size ) for partition_id, _, _ in partitions: for i in xrange(count): print >>self.output_stream, \ 'exon_ival\t%s\t%012d\t' \ '%012d\t%s' \ % (partition_id, exon_pos, exon_end_pos, sample_index) output_line_count += 1 if self.exon_diffs: '''Compare arguments of AS:i: and XS:i: to determine whether an alignment is unique.''' if self.unique(alignment): uniqueness = '1' else: uniqueness = '0' for exon_pos, exon_end_pos in exons: partitions = partition.partition( rname, exon_pos, exon_end_pos, self.bin_size ) for (partition_id, partition_start, partition_end) in partitions: assert exon_pos <= partition_end # Print increment at interval start print >>self.output_stream, \ 'exon_diff\t%s\t%012d\t%s\t%s\t%d' \ % (partition_id, max(partition_start, exon_pos), sample_index, uniqueness, count) output_line_count += 1 assert exon_end_pos > partition_start if exon_end_pos <= partition_end: '''Print decrement at interval end iff exon ends before partition ends.''' print >>self.output_stream, \ 'exon_diff\t%s\t%012d\t' \ '%s\t%s\t-%d' \ % (partition_id, exon_end_pos, sample_index, uniqueness, count) output_line_count += 1 try: reverse_strand_string = [field for field in alignment if field[:5] == 'XS:A:'][0][5:] except IndexError: # No introns pass else: # Output introns for (intron_pos, intron_end_pos, left_displacement, right_displacement) \ in introns: print >>self.output_stream, ( ('intron_bed\tN\t%s\t%012d\t%012d\t%s\t%s\t' '%d\t%d\t%d') % (self.reference_index.\ rname_to_string[rname], intron_pos, intron_end_pos, reverse_strand_string, sample_index, left_displacement, right_displacement, count) ) output_line_count += 1 # Write SAM output for alignment in multiread_reports_and_ties[0]: print >>self.output_stream, 'sam\t' \ + '\t'.join( (self.sample_and_rname_indexes.index( sample_index, self.reference_index.rname_to_string[ alignment[2] ] ), '%012d' % int(alignment[3]), alignment[0].partition('\x1d')[0], alignment[1]) + alignment[4:] ) try: ties_to_print = multiread_reports_and_ties[1] except IndexError: # No ties pass else: for alignment in ties_to_print: qname = alignment[0] flag = alignment[1] cigar = alignment[5] rname = alignment[2] pos = int(alignment[3]) seq = alignment[9] md = [field for field in alignment if field[:5] == 'MD:Z:'][0][5:] insertions, deletions, introns, exons \ = indels_introns_and_exons(cigar, md, pos, seq, drop_deletions=self.drop_deletions) try: sense = [field[5:] for field in alignment if field[:5] == 'XS:A:'][0] except IndexError: pass if introns: for intron in introns: print >>self.output_stream, ( ('sam_intron_ties\tN\t%s\t' '%012d\t%012d\t%s\t%s\t_' '\t%012d\t%s\t%s\t') % ( self.reference_index.rname_to_string[ rname ], intron[0], intron[1], sense, self.manifest_object.label_to_index[ qname.rpartition('\x1d')[2] ], pos, qname, flag) ) + '\t'.join(alignment[4:]) else: print >>self.output_stream, '\t'.join(('sam_clip_ties',) \ + alignment) return output_line_count
def run_analysis(filename, keyword_file, print_all = True): partitioned_tweets = partition.partition(filename, keyword_file) analyzed_tweets = {} for candidate, tweets in partitioned_tweets.items(): analyzed_tweets[candidate] = sentiment.run_sentiment_analysis(tweets, 'words') predict.predict(analyzed_tweets, print_all)
def __init__(self, path): self.path = path self.partitions = {} for i in self.__getPartitions(): self.partitions[i] = partition(self, i)
def quicksort(L, p, r): if p < r: k = partition(L, p, r) quicksort(L, p, k-1) quicksort(L, k+1, r)