Example #1
0
class Ops2:

    def setup(self):
        N = 10**3
        self.df = DataFrame(np.random.randn(N, N))
        self.df2 = DataFrame(np.random.randn(N, N))

        self.df_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
                                                  np.iinfo(np.int16).max,
                                                  size=(N, N)))
        self.df2_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
                                                   np.iinfo(np.int16).max,
                                                   size=(N, N)))

        self.s = Series(np.random.randn(N))

    # Division

    def time_frame_float_div(self):
        self.df // self.df2

    def time_frame_float_div_by_zero(self):
        self.df / 0

    def time_frame_float_floor_by_zero(self):
        self.df // 0

    def time_frame_int_div_by_zero(self):
        self.df_int / 0

    # Modulo

    def time_frame_int_mod(self):
        self.df_int % self.df2_int

    def time_frame_float_mod(self):
        self.df % self.df2

    # Dot product

    def time_frame_dot(self):
        self.df.dot(self.df2)

    def time_series_dot(self):
        self.s.dot(self.s)

    def time_frame_series_dot(self):
        self.df.dot(self.s)
def numpy_dot():
    '''
    Imagine a point system in which each country is awarded 4 points for each
    gold medal,  2 points for each silver medal, and one point for each 
    bronze medal.  

    Using the numpy.dot function, create a new dataframe called 
    'olympic_points_df' that includes:
        a) a column called 'country_name' with the country name
        b) a column called 'points' with the total number of points the country
           earned at the Sochi olympics.
    '''

    countries = ['Russian Fed.', 'Norway', 'Canada', 'United States',
                 'Netherlands', 'Germany', 'Switzerland', 'Belarus',
                 'Austria', 'France', 'Poland', 'China', 'Korea', 
                 'Sweden', 'Czech Republic', 'Slovenia', 'Japan',
                 'Finland', 'Great Britain', 'Ukraine', 'Slovakia',
                 'Italy', 'Latvia', 'Australia', 'Croatia', 'Kazakhstan']

    gold = [13, 11, 10, 9, 8, 8, 6, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
    silver = [11, 5, 10, 7, 7, 6, 3, 0, 8, 4, 1, 4, 3, 7, 4, 2, 4, 3, 1, 0, 0, 2, 2, 2, 1, 0]
    bronze = [9, 10, 5, 12, 9, 5, 2, 1, 5, 7, 1, 2, 2, 6, 2, 4, 3, 1, 2, 1, 0, 6, 2, 1, 0, 1]
 
    # YOUR CODE HERE
    olympic_medal_counts_df = { 'gold' : Series(gold), 'silver' : Series(silver),
                                   'bronze':Series(bronze)}
    vector = [4,2,1]
    df = DataFrame(olympic_medal_counts_df)
    df = df[['gold', 'silver', 'bronze']]
    print df
    
    points = df.dot(vector)
    olympic_points_df = DataFrame({'country_name': Series(countries),'points': Series(points)})
    return olympic_points_df
def metabolite_distance(model, model_biomass=None, drop_metabolites=None):
    # Calculate metabolite-reaction distance matrix
    V = model.get_stoichiometric_matrix()

    # Remove exchange and biomass reactions
    V = V.drop(np.append(model.get_exchanges(check_matrix=True), model_biomass), axis=1) if model_biomass else V.drop(model.get_exchanges(check_matrix=True), axis=1)

    # Convert floating stoichiometric values to 1
    V = DataFrame([[i if i == 0 else 1 for i in j] for j in V.values], index=V.index, columns=V.columns)

    # Remove highly connected metabolites
    V = V.drop(drop_metabolites)

    # Multiply stoichiometric matrix by its transpose
    M = V.dot(V.T).abs()

    # Get shortest path lengths for all metabolites
    G = DataFrame(nx.all_pairs_dijkstra_path_length(nx.from_numpy_matrix(M.values, create_using=nx.DiGraph())))
    G = G.set_index(M.index)
    G.columns = M.index

    return G
Example #4
0
class GradeBook(object):
    """A class encapsulating a pandas DataFrame and meant to store 
    the grades for a whole class. It provides the method compute_total_grades
    that compute the total grade for each student according to a weights provided
    by the caller.
    """

    def __init__(self, grade_arr, student_ids, item_list, max_scores):
        """
        Constructor of the class grade frame: 
	It should set the following attributes:

	(1) self.raw_grades, which is a DataFrame with 
	        - row labels given by student_ids
	        - column labels given by item_list
	        - values given by grade_arr

        (2) self.total_grades, set to None

	(3) self.letter_grades, set to None

	(4) self.max_scores, set to max_scores
        
        Parameters
        ----------
        grade_arr : numpy array of grades as returned by simulate_grades

        student_ids: a list of student ids 

	item_list: a list of grade items (e.g. ['HW', 'M', 'F'])

	max_scores: a list of the maximum possible score for each grade item
        
        Returns
        -------
        nothing 
        
        Examples
        --------
        >>> a = GradeBook(array([[1,2],[3,4]]),['22','34'],['F','M'],[30, 50])
        >>> a.letter_grades == None
        True
        >>> a.total_grades == None
        True
        >>> a.raw_grades.shape == (2,2)
        True
        >>> a.raw_grades.ix[0,0] == 1
        True
	    >>> a.max_scores[0] == 30
	    True
        """
        self.raw_grades = DataFrame(grade_arr, index=student_ids, columns=item_list)
        self.total_grades = None 
        self.letter_grades = None
        self.max_scores = max_scores
 

    def compute_total_grades(self, item_weights=None, max_score=100):
        """
        Compute student total class grades as a weighted average of the column in self.raw_grades 
        according to the weights passed to item_weight for each of the columns.
        The student total class grades are then stored in the Series attribute self.total_grades
        The return value should be a Series containing a numerical summary
        (as returned by the Series method describe) of the total class grade distribution. 
    
        Parameters
        ----------
        item_weights: list of floats summing up to one
            List of weights to be applied to each grade item (e.g. [0.3, 0.4, 0.3]) 
        
        max_score: float 
            Maximal possible score for the total class grade	
    
        Returns
        -------
        out : Series 
            A Series containing a numerical summary of the total 
    	grade distribution previously stored by the function 
    	in the attribute self.total_grades; this Series is the
    	output of the Series method describe.
        ----
    
        Examples
        --------
        >>> a = GradeBook(array([[5,5],[1,1]]),['22','34'],['F','M'],[10, 10])
	    >>> b = a.compute_total_grades([0.5, 0.5], 100)
	    >>> len(b) == 5
	    False
	    >>> a.total_grades['22'] == 50
	    True
	    >>> a.total_grades['34'] == 10
	    True
        """
        self.total_grades = self.raw_grades.dot(pd.Series(item_weights, index=self.raw_grades.columns))
        percent = (1.0 / pd.Series(self.max_scores, index=self.total_grades.index) ) * max_score
        self.total_grades *= percent
        return self.total_grades.describe()